xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/i386/i386.c (revision 7c192b2a5e1093666e67801684f930ef49b3b363)
1 /* Subroutines used for code generation on IA-32.
2    Copyright (C) 1988-2015 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10 
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "calls.h"
39 #include "stor-layout.h"
40 #include "varasm.h"
41 #include "tm_p.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
46 #include "output.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "except.h"
51 #include "function.h"
52 #include "recog.h"
53 #include "hashtab.h"
54 #include "statistics.h"
55 #include "real.h"
56 #include "fixed-value.h"
57 #include "expmed.h"
58 #include "dojump.h"
59 #include "explow.h"
60 #include "emit-rtl.h"
61 #include "stmt.h"
62 #include "expr.h"
63 #include "optabs.h"
64 #include "diagnostic-core.h"
65 #include "toplev.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "ggc.h"
76 #include "target.h"
77 #include "target-def.h"
78 #include "common/common-target.h"
79 #include "langhooks.h"
80 #include "reload.h"
81 #include "hash-map.h"
82 #include "is-a.h"
83 #include "plugin-api.h"
84 #include "ipa-ref.h"
85 #include "cgraph.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "gimple.h"
93 #include "gimplify.h"
94 #include "cfgloop.h"
95 #include "dwarf2.h"
96 #include "df.h"
97 #include "tm-constrs.h"
98 #include "params.h"
99 #include "cselib.h"
100 #include "debug.h"
101 #include "sched-int.h"
102 #include "sbitmap.h"
103 #include "fibheap.h"
104 #include "opts.h"
105 #include "diagnostic.h"
106 #include "dumpfile.h"
107 #include "tree-pass.h"
108 #include "context.h"
109 #include "pass_manager.h"
110 #include "target-globals.h"
111 #include "tree-vectorizer.h"
112 #include "shrink-wrap.h"
113 #include "builtins.h"
114 #include "rtl-iter.h"
115 #include "tree-iterator.h"
116 #include "tree-chkp.h"
117 #include "rtl-chkp.h"
118 #include "dojump.h"
119 
120 static rtx legitimize_dllimport_symbol (rtx, bool);
121 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
122 static rtx legitimize_pe_coff_symbol (rtx, bool);
123 
124 #ifndef CHECK_STACK_LIMIT
125 #define CHECK_STACK_LIMIT (-1)
126 #endif
127 
128 /* Return index of given mode in mult and division cost tables.  */
129 #define MODE_INDEX(mode)					\
130   ((mode) == QImode ? 0						\
131    : (mode) == HImode ? 1					\
132    : (mode) == SImode ? 2					\
133    : (mode) == DImode ? 3					\
134    : 4)
135 
136 /* Processor costs (relative to an add) */
137 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
138 #define COSTS_N_BYTES(N) ((N) * 2)
139 
140 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
141 
142 static stringop_algs ix86_size_memcpy[2] = {
143   {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
144   {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
145 static stringop_algs ix86_size_memset[2] = {
146   {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
147   {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
148 
149 const
150 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
151   COSTS_N_BYTES (2),			/* cost of an add instruction */
152   COSTS_N_BYTES (3),			/* cost of a lea instruction */
153   COSTS_N_BYTES (2),			/* variable shift costs */
154   COSTS_N_BYTES (3),			/* constant shift costs */
155   {COSTS_N_BYTES (3),			/* cost of starting multiply for QI */
156    COSTS_N_BYTES (3),			/*				 HI */
157    COSTS_N_BYTES (3),			/*				 SI */
158    COSTS_N_BYTES (3),			/*				 DI */
159    COSTS_N_BYTES (5)},			/*			      other */
160   0,					/* cost of multiply per each bit set */
161   {COSTS_N_BYTES (3),			/* cost of a divide/mod for QI */
162    COSTS_N_BYTES (3),			/*			    HI */
163    COSTS_N_BYTES (3),			/*			    SI */
164    COSTS_N_BYTES (3),			/*			    DI */
165    COSTS_N_BYTES (5)},			/*			    other */
166   COSTS_N_BYTES (3),			/* cost of movsx */
167   COSTS_N_BYTES (3),			/* cost of movzx */
168   0,					/* "large" insn */
169   2,					/* MOVE_RATIO */
170   2,				     /* cost for loading QImode using movzbl */
171   {2, 2, 2},				/* cost of loading integer registers
172 					   in QImode, HImode and SImode.
173 					   Relative to reg-reg move (2).  */
174   {2, 2, 2},				/* cost of storing integer registers */
175   2,					/* cost of reg,reg fld/fst */
176   {2, 2, 2},				/* cost of loading fp registers
177 					   in SFmode, DFmode and XFmode */
178   {2, 2, 2},				/* cost of storing fp registers
179 					   in SFmode, DFmode and XFmode */
180   3,					/* cost of moving MMX register */
181   {3, 3},				/* cost of loading MMX registers
182 					   in SImode and DImode */
183   {3, 3},				/* cost of storing MMX registers
184 					   in SImode and DImode */
185   3,					/* cost of moving SSE register */
186   {3, 3, 3},				/* cost of loading SSE registers
187 					   in SImode, DImode and TImode */
188   {3, 3, 3},				/* cost of storing SSE registers
189 					   in SImode, DImode and TImode */
190   3,					/* MMX or SSE register to integer */
191   0,					/* size of l1 cache  */
192   0,					/* size of l2 cache  */
193   0,					/* size of prefetch block */
194   0,					/* number of parallel prefetches */
195   2,					/* Branch cost */
196   COSTS_N_BYTES (2),			/* cost of FADD and FSUB insns.  */
197   COSTS_N_BYTES (2),			/* cost of FMUL instruction.  */
198   COSTS_N_BYTES (2),			/* cost of FDIV instruction.  */
199   COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
200   COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
201   COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
202   ix86_size_memcpy,
203   ix86_size_memset,
204   1,					/* scalar_stmt_cost.  */
205   1,					/* scalar load_cost.  */
206   1,					/* scalar_store_cost.  */
207   1,					/* vec_stmt_cost.  */
208   1,					/* vec_to_scalar_cost.  */
209   1,					/* scalar_to_vec_cost.  */
210   1,					/* vec_align_load_cost.  */
211   1,					/* vec_unalign_load_cost.  */
212   1,					/* vec_store_cost.  */
213   1,					/* cond_taken_branch_cost.  */
214   1,					/* cond_not_taken_branch_cost.  */
215 };
216 
217 /* Processor costs (relative to an add) */
218 static stringop_algs i386_memcpy[2] = {
219   {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
220   DUMMY_STRINGOP_ALGS};
221 static stringop_algs i386_memset[2] = {
222   {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
223   DUMMY_STRINGOP_ALGS};
224 
225 static const
226 struct processor_costs i386_cost = {	/* 386 specific costs */
227   COSTS_N_INSNS (1),			/* cost of an add instruction */
228   COSTS_N_INSNS (1),			/* cost of a lea instruction */
229   COSTS_N_INSNS (3),			/* variable shift costs */
230   COSTS_N_INSNS (2),			/* constant shift costs */
231   {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
232    COSTS_N_INSNS (6),			/*				 HI */
233    COSTS_N_INSNS (6),			/*				 SI */
234    COSTS_N_INSNS (6),			/*				 DI */
235    COSTS_N_INSNS (6)},			/*			      other */
236   COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
237   {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
238    COSTS_N_INSNS (23),			/*			    HI */
239    COSTS_N_INSNS (23),			/*			    SI */
240    COSTS_N_INSNS (23),			/*			    DI */
241    COSTS_N_INSNS (23)},			/*			    other */
242   COSTS_N_INSNS (3),			/* cost of movsx */
243   COSTS_N_INSNS (2),			/* cost of movzx */
244   15,					/* "large" insn */
245   3,					/* MOVE_RATIO */
246   4,				     /* cost for loading QImode using movzbl */
247   {2, 4, 2},				/* cost of loading integer registers
248 					   in QImode, HImode and SImode.
249 					   Relative to reg-reg move (2).  */
250   {2, 4, 2},				/* cost of storing integer registers */
251   2,					/* cost of reg,reg fld/fst */
252   {8, 8, 8},				/* cost of loading fp registers
253 					   in SFmode, DFmode and XFmode */
254   {8, 8, 8},				/* cost of storing fp registers
255 					   in SFmode, DFmode and XFmode */
256   2,					/* cost of moving MMX register */
257   {4, 8},				/* cost of loading MMX registers
258 					   in SImode and DImode */
259   {4, 8},				/* cost of storing MMX registers
260 					   in SImode and DImode */
261   2,					/* cost of moving SSE register */
262   {4, 8, 16},				/* cost of loading SSE registers
263 					   in SImode, DImode and TImode */
264   {4, 8, 16},				/* cost of storing SSE registers
265 					   in SImode, DImode and TImode */
266   3,					/* MMX or SSE register to integer */
267   0,					/* size of l1 cache  */
268   0,					/* size of l2 cache  */
269   0,					/* size of prefetch block */
270   0,					/* number of parallel prefetches */
271   1,					/* Branch cost */
272   COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
273   COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
274   COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
275   COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
276   COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
277   COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
278   i386_memcpy,
279   i386_memset,
280   1,					/* scalar_stmt_cost.  */
281   1,					/* scalar load_cost.  */
282   1,					/* scalar_store_cost.  */
283   1,					/* vec_stmt_cost.  */
284   1,					/* vec_to_scalar_cost.  */
285   1,					/* scalar_to_vec_cost.  */
286   1,					/* vec_align_load_cost.  */
287   2,					/* vec_unalign_load_cost.  */
288   1,					/* vec_store_cost.  */
289   3,					/* cond_taken_branch_cost.  */
290   1,					/* cond_not_taken_branch_cost.  */
291 };
292 
293 static stringop_algs i486_memcpy[2] = {
294   {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
295   DUMMY_STRINGOP_ALGS};
296 static stringop_algs i486_memset[2] = {
297   {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
298   DUMMY_STRINGOP_ALGS};
299 
300 static const
301 struct processor_costs i486_cost = {	/* 486 specific costs */
302   COSTS_N_INSNS (1),			/* cost of an add instruction */
303   COSTS_N_INSNS (1),			/* cost of a lea instruction */
304   COSTS_N_INSNS (3),			/* variable shift costs */
305   COSTS_N_INSNS (2),			/* constant shift costs */
306   {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
307    COSTS_N_INSNS (12),			/*				 HI */
308    COSTS_N_INSNS (12),			/*				 SI */
309    COSTS_N_INSNS (12),			/*				 DI */
310    COSTS_N_INSNS (12)},			/*			      other */
311   1,					/* cost of multiply per each bit set */
312   {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
313    COSTS_N_INSNS (40),			/*			    HI */
314    COSTS_N_INSNS (40),			/*			    SI */
315    COSTS_N_INSNS (40),			/*			    DI */
316    COSTS_N_INSNS (40)},			/*			    other */
317   COSTS_N_INSNS (3),			/* cost of movsx */
318   COSTS_N_INSNS (2),			/* cost of movzx */
319   15,					/* "large" insn */
320   3,					/* MOVE_RATIO */
321   4,				     /* cost for loading QImode using movzbl */
322   {2, 4, 2},				/* cost of loading integer registers
323 					   in QImode, HImode and SImode.
324 					   Relative to reg-reg move (2).  */
325   {2, 4, 2},				/* cost of storing integer registers */
326   2,					/* cost of reg,reg fld/fst */
327   {8, 8, 8},				/* cost of loading fp registers
328 					   in SFmode, DFmode and XFmode */
329   {8, 8, 8},				/* cost of storing fp registers
330 					   in SFmode, DFmode and XFmode */
331   2,					/* cost of moving MMX register */
332   {4, 8},				/* cost of loading MMX registers
333 					   in SImode and DImode */
334   {4, 8},				/* cost of storing MMX registers
335 					   in SImode and DImode */
336   2,					/* cost of moving SSE register */
337   {4, 8, 16},				/* cost of loading SSE registers
338 					   in SImode, DImode and TImode */
339   {4, 8, 16},				/* cost of storing SSE registers
340 					   in SImode, DImode and TImode */
341   3,					/* MMX or SSE register to integer */
342   4,					/* size of l1 cache.  486 has 8kB cache
343 					   shared for code and data, so 4kB is
344 					   not really precise.  */
345   4,					/* size of l2 cache  */
346   0,					/* size of prefetch block */
347   0,					/* number of parallel prefetches */
348   1,					/* Branch cost */
349   COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
350   COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
351   COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
352   COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
353   COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
354   COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
355   i486_memcpy,
356   i486_memset,
357   1,					/* scalar_stmt_cost.  */
358   1,					/* scalar load_cost.  */
359   1,					/* scalar_store_cost.  */
360   1,					/* vec_stmt_cost.  */
361   1,					/* vec_to_scalar_cost.  */
362   1,					/* scalar_to_vec_cost.  */
363   1,					/* vec_align_load_cost.  */
364   2,					/* vec_unalign_load_cost.  */
365   1,					/* vec_store_cost.  */
366   3,					/* cond_taken_branch_cost.  */
367   1,					/* cond_not_taken_branch_cost.  */
368 };
369 
370 static stringop_algs pentium_memcpy[2] = {
371   {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
372   DUMMY_STRINGOP_ALGS};
373 static stringop_algs pentium_memset[2] = {
374   {libcall, {{-1, rep_prefix_4_byte, false}}},
375   DUMMY_STRINGOP_ALGS};
376 
377 static const
378 struct processor_costs pentium_cost = {
379   COSTS_N_INSNS (1),			/* cost of an add instruction */
380   COSTS_N_INSNS (1),			/* cost of a lea instruction */
381   COSTS_N_INSNS (4),			/* variable shift costs */
382   COSTS_N_INSNS (1),			/* constant shift costs */
383   {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
384    COSTS_N_INSNS (11),			/*				 HI */
385    COSTS_N_INSNS (11),			/*				 SI */
386    COSTS_N_INSNS (11),			/*				 DI */
387    COSTS_N_INSNS (11)},			/*			      other */
388   0,					/* cost of multiply per each bit set */
389   {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
390    COSTS_N_INSNS (25),			/*			    HI */
391    COSTS_N_INSNS (25),			/*			    SI */
392    COSTS_N_INSNS (25),			/*			    DI */
393    COSTS_N_INSNS (25)},			/*			    other */
394   COSTS_N_INSNS (3),			/* cost of movsx */
395   COSTS_N_INSNS (2),			/* cost of movzx */
396   8,					/* "large" insn */
397   6,					/* MOVE_RATIO */
398   6,				     /* cost for loading QImode using movzbl */
399   {2, 4, 2},				/* cost of loading integer registers
400 					   in QImode, HImode and SImode.
401 					   Relative to reg-reg move (2).  */
402   {2, 4, 2},				/* cost of storing integer registers */
403   2,					/* cost of reg,reg fld/fst */
404   {2, 2, 6},				/* cost of loading fp registers
405 					   in SFmode, DFmode and XFmode */
406   {4, 4, 6},				/* cost of storing fp registers
407 					   in SFmode, DFmode and XFmode */
408   8,					/* cost of moving MMX register */
409   {8, 8},				/* cost of loading MMX registers
410 					   in SImode and DImode */
411   {8, 8},				/* cost of storing MMX registers
412 					   in SImode and DImode */
413   2,					/* cost of moving SSE register */
414   {4, 8, 16},				/* cost of loading SSE registers
415 					   in SImode, DImode and TImode */
416   {4, 8, 16},				/* cost of storing SSE registers
417 					   in SImode, DImode and TImode */
418   3,					/* MMX or SSE register to integer */
419   8,					/* size of l1 cache.  */
420   8,					/* size of l2 cache  */
421   0,					/* size of prefetch block */
422   0,					/* number of parallel prefetches */
423   2,					/* Branch cost */
424   COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
425   COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
426   COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
427   COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
428   COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
429   COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
430   pentium_memcpy,
431   pentium_memset,
432   1,					/* scalar_stmt_cost.  */
433   1,					/* scalar load_cost.  */
434   1,					/* scalar_store_cost.  */
435   1,					/* vec_stmt_cost.  */
436   1,					/* vec_to_scalar_cost.  */
437   1,					/* scalar_to_vec_cost.  */
438   1,					/* vec_align_load_cost.  */
439   2,					/* vec_unalign_load_cost.  */
440   1,					/* vec_store_cost.  */
441   3,					/* cond_taken_branch_cost.  */
442   1,					/* cond_not_taken_branch_cost.  */
443 };
444 
445 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
446    (we ensure the alignment).  For small blocks inline loop is still a
447    noticeable win, for bigger blocks either rep movsl or rep movsb is
448    way to go.  Rep movsb has apparently more expensive startup time in CPU,
449    but after 4K the difference is down in the noise.  */
450 static stringop_algs pentiumpro_memcpy[2] = {
451   {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
452                        {8192, rep_prefix_4_byte, false},
453                        {-1, rep_prefix_1_byte, false}}},
454   DUMMY_STRINGOP_ALGS};
455 static stringop_algs pentiumpro_memset[2] = {
456   {rep_prefix_4_byte, {{1024, unrolled_loop, false},
457                        {8192, rep_prefix_4_byte, false},
458                        {-1, libcall, false}}},
459   DUMMY_STRINGOP_ALGS};
460 static const
461 struct processor_costs pentiumpro_cost = {
462   COSTS_N_INSNS (1),			/* cost of an add instruction */
463   COSTS_N_INSNS (1),			/* cost of a lea instruction */
464   COSTS_N_INSNS (1),			/* variable shift costs */
465   COSTS_N_INSNS (1),			/* constant shift costs */
466   {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
467    COSTS_N_INSNS (4),			/*				 HI */
468    COSTS_N_INSNS (4),			/*				 SI */
469    COSTS_N_INSNS (4),			/*				 DI */
470    COSTS_N_INSNS (4)},			/*			      other */
471   0,					/* cost of multiply per each bit set */
472   {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
473    COSTS_N_INSNS (17),			/*			    HI */
474    COSTS_N_INSNS (17),			/*			    SI */
475    COSTS_N_INSNS (17),			/*			    DI */
476    COSTS_N_INSNS (17)},			/*			    other */
477   COSTS_N_INSNS (1),			/* cost of movsx */
478   COSTS_N_INSNS (1),			/* cost of movzx */
479   8,					/* "large" insn */
480   6,					/* MOVE_RATIO */
481   2,				     /* cost for loading QImode using movzbl */
482   {4, 4, 4},				/* cost of loading integer registers
483 					   in QImode, HImode and SImode.
484 					   Relative to reg-reg move (2).  */
485   {2, 2, 2},				/* cost of storing integer registers */
486   2,					/* cost of reg,reg fld/fst */
487   {2, 2, 6},				/* cost of loading fp registers
488 					   in SFmode, DFmode and XFmode */
489   {4, 4, 6},				/* cost of storing fp registers
490 					   in SFmode, DFmode and XFmode */
491   2,					/* cost of moving MMX register */
492   {2, 2},				/* cost of loading MMX registers
493 					   in SImode and DImode */
494   {2, 2},				/* cost of storing MMX registers
495 					   in SImode and DImode */
496   2,					/* cost of moving SSE register */
497   {2, 2, 8},				/* cost of loading SSE registers
498 					   in SImode, DImode and TImode */
499   {2, 2, 8},				/* cost of storing SSE registers
500 					   in SImode, DImode and TImode */
501   3,					/* MMX or SSE register to integer */
502   8,					/* size of l1 cache.  */
503   256,					/* size of l2 cache  */
504   32,					/* size of prefetch block */
505   6,					/* number of parallel prefetches */
506   2,					/* Branch cost */
507   COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
508   COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
509   COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
510   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
511   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
512   COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
513   pentiumpro_memcpy,
514   pentiumpro_memset,
515   1,					/* scalar_stmt_cost.  */
516   1,					/* scalar load_cost.  */
517   1,					/* scalar_store_cost.  */
518   1,					/* vec_stmt_cost.  */
519   1,					/* vec_to_scalar_cost.  */
520   1,					/* scalar_to_vec_cost.  */
521   1,					/* vec_align_load_cost.  */
522   2,					/* vec_unalign_load_cost.  */
523   1,					/* vec_store_cost.  */
524   3,					/* cond_taken_branch_cost.  */
525   1,					/* cond_not_taken_branch_cost.  */
526 };
527 
528 static stringop_algs geode_memcpy[2] = {
529   {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
530   DUMMY_STRINGOP_ALGS};
531 static stringop_algs geode_memset[2] = {
532   {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
533   DUMMY_STRINGOP_ALGS};
534 static const
535 struct processor_costs geode_cost = {
536   COSTS_N_INSNS (1),			/* cost of an add instruction */
537   COSTS_N_INSNS (1),			/* cost of a lea instruction */
538   COSTS_N_INSNS (2),			/* variable shift costs */
539   COSTS_N_INSNS (1),			/* constant shift costs */
540   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
541    COSTS_N_INSNS (4),			/*				 HI */
542    COSTS_N_INSNS (7),			/*				 SI */
543    COSTS_N_INSNS (7),			/*				 DI */
544    COSTS_N_INSNS (7)},			/*			      other */
545   0,					/* cost of multiply per each bit set */
546   {COSTS_N_INSNS (15),			/* cost of a divide/mod for QI */
547    COSTS_N_INSNS (23),			/*			    HI */
548    COSTS_N_INSNS (39),			/*			    SI */
549    COSTS_N_INSNS (39),			/*			    DI */
550    COSTS_N_INSNS (39)},			/*			    other */
551   COSTS_N_INSNS (1),			/* cost of movsx */
552   COSTS_N_INSNS (1),			/* cost of movzx */
553   8,					/* "large" insn */
554   4,					/* MOVE_RATIO */
555   1,				     /* cost for loading QImode using movzbl */
556   {1, 1, 1},				/* cost of loading integer registers
557 					   in QImode, HImode and SImode.
558 					   Relative to reg-reg move (2).  */
559   {1, 1, 1},				/* cost of storing integer registers */
560   1,					/* cost of reg,reg fld/fst */
561   {1, 1, 1},				/* cost of loading fp registers
562 					   in SFmode, DFmode and XFmode */
563   {4, 6, 6},				/* cost of storing fp registers
564 					   in SFmode, DFmode and XFmode */
565 
566   2,					/* cost of moving MMX register */
567   {2, 2},				/* cost of loading MMX registers
568 					   in SImode and DImode */
569   {2, 2},				/* cost of storing MMX registers
570 					   in SImode and DImode */
571   2,					/* cost of moving SSE register */
572   {2, 2, 8},				/* cost of loading SSE registers
573 					   in SImode, DImode and TImode */
574   {2, 2, 8},				/* cost of storing SSE registers
575 					   in SImode, DImode and TImode */
576   3,					/* MMX or SSE register to integer */
577   64,					/* size of l1 cache.  */
578   128,					/* size of l2 cache.  */
579   32,					/* size of prefetch block */
580   1,					/* number of parallel prefetches */
581   1,					/* Branch cost */
582   COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
583   COSTS_N_INSNS (11),			/* cost of FMUL instruction.  */
584   COSTS_N_INSNS (47),			/* cost of FDIV instruction.  */
585   COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
586   COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
587   COSTS_N_INSNS (54),			/* cost of FSQRT instruction.  */
588   geode_memcpy,
589   geode_memset,
590   1,					/* scalar_stmt_cost.  */
591   1,					/* scalar load_cost.  */
592   1,					/* scalar_store_cost.  */
593   1,					/* vec_stmt_cost.  */
594   1,					/* vec_to_scalar_cost.  */
595   1,					/* scalar_to_vec_cost.  */
596   1,					/* vec_align_load_cost.  */
597   2,					/* vec_unalign_load_cost.  */
598   1,					/* vec_store_cost.  */
599   3,					/* cond_taken_branch_cost.  */
600   1,					/* cond_not_taken_branch_cost.  */
601 };
602 
603 static stringop_algs k6_memcpy[2] = {
604   {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
605   DUMMY_STRINGOP_ALGS};
606 static stringop_algs k6_memset[2] = {
607   {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
608   DUMMY_STRINGOP_ALGS};
609 static const
610 struct processor_costs k6_cost = {
611   COSTS_N_INSNS (1),			/* cost of an add instruction */
612   COSTS_N_INSNS (2),			/* cost of a lea instruction */
613   COSTS_N_INSNS (1),			/* variable shift costs */
614   COSTS_N_INSNS (1),			/* constant shift costs */
615   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
616    COSTS_N_INSNS (3),			/*				 HI */
617    COSTS_N_INSNS (3),			/*				 SI */
618    COSTS_N_INSNS (3),			/*				 DI */
619    COSTS_N_INSNS (3)},			/*			      other */
620   0,					/* cost of multiply per each bit set */
621   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
622    COSTS_N_INSNS (18),			/*			    HI */
623    COSTS_N_INSNS (18),			/*			    SI */
624    COSTS_N_INSNS (18),			/*			    DI */
625    COSTS_N_INSNS (18)},			/*			    other */
626   COSTS_N_INSNS (2),			/* cost of movsx */
627   COSTS_N_INSNS (2),			/* cost of movzx */
628   8,					/* "large" insn */
629   4,					/* MOVE_RATIO */
630   3,				     /* cost for loading QImode using movzbl */
631   {4, 5, 4},				/* cost of loading integer registers
632 					   in QImode, HImode and SImode.
633 					   Relative to reg-reg move (2).  */
634   {2, 3, 2},				/* cost of storing integer registers */
635   4,					/* cost of reg,reg fld/fst */
636   {6, 6, 6},				/* cost of loading fp registers
637 					   in SFmode, DFmode and XFmode */
638   {4, 4, 4},				/* cost of storing fp registers
639 					   in SFmode, DFmode and XFmode */
640   2,					/* cost of moving MMX register */
641   {2, 2},				/* cost of loading MMX registers
642 					   in SImode and DImode */
643   {2, 2},				/* cost of storing MMX registers
644 					   in SImode and DImode */
645   2,					/* cost of moving SSE register */
646   {2, 2, 8},				/* cost of loading SSE registers
647 					   in SImode, DImode and TImode */
648   {2, 2, 8},				/* cost of storing SSE registers
649 					   in SImode, DImode and TImode */
650   6,					/* MMX or SSE register to integer */
651   32,					/* size of l1 cache.  */
652   32,					/* size of l2 cache.  Some models
653 					   have integrated l2 cache, but
654 					   optimizing for k6 is not important
655 					   enough to worry about that.  */
656   32,					/* size of prefetch block */
657   1,					/* number of parallel prefetches */
658   1,					/* Branch cost */
659   COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
660   COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
661   COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
662   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
663   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
664   COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
665   k6_memcpy,
666   k6_memset,
667   1,					/* scalar_stmt_cost.  */
668   1,					/* scalar load_cost.  */
669   1,					/* scalar_store_cost.  */
670   1,					/* vec_stmt_cost.  */
671   1,					/* vec_to_scalar_cost.  */
672   1,					/* scalar_to_vec_cost.  */
673   1,					/* vec_align_load_cost.  */
674   2,					/* vec_unalign_load_cost.  */
675   1,					/* vec_store_cost.  */
676   3,					/* cond_taken_branch_cost.  */
677   1,					/* cond_not_taken_branch_cost.  */
678 };
679 
680 /* For some reason, Athlon deals better with REP prefix (relative to loops)
681    compared to K8. Alignment becomes important after 8 bytes for memcpy and
682    128 bytes for memset.  */
683 static stringop_algs athlon_memcpy[2] = {
684   {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
685   DUMMY_STRINGOP_ALGS};
686 static stringop_algs athlon_memset[2] = {
687   {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
688   DUMMY_STRINGOP_ALGS};
689 static const
690 struct processor_costs athlon_cost = {
691   COSTS_N_INSNS (1),			/* cost of an add instruction */
692   COSTS_N_INSNS (2),			/* cost of a lea instruction */
693   COSTS_N_INSNS (1),			/* variable shift costs */
694   COSTS_N_INSNS (1),			/* constant shift costs */
695   {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
696    COSTS_N_INSNS (5),			/*				 HI */
697    COSTS_N_INSNS (5),			/*				 SI */
698    COSTS_N_INSNS (5),			/*				 DI */
699    COSTS_N_INSNS (5)},			/*			      other */
700   0,					/* cost of multiply per each bit set */
701   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
702    COSTS_N_INSNS (26),			/*			    HI */
703    COSTS_N_INSNS (42),			/*			    SI */
704    COSTS_N_INSNS (74),			/*			    DI */
705    COSTS_N_INSNS (74)},			/*			    other */
706   COSTS_N_INSNS (1),			/* cost of movsx */
707   COSTS_N_INSNS (1),			/* cost of movzx */
708   8,					/* "large" insn */
709   9,					/* MOVE_RATIO */
710   4,				     /* cost for loading QImode using movzbl */
711   {3, 4, 3},				/* cost of loading integer registers
712 					   in QImode, HImode and SImode.
713 					   Relative to reg-reg move (2).  */
714   {3, 4, 3},				/* cost of storing integer registers */
715   4,					/* cost of reg,reg fld/fst */
716   {4, 4, 12},				/* cost of loading fp registers
717 					   in SFmode, DFmode and XFmode */
718   {6, 6, 8},				/* cost of storing fp registers
719 					   in SFmode, DFmode and XFmode */
720   2,					/* cost of moving MMX register */
721   {4, 4},				/* cost of loading MMX registers
722 					   in SImode and DImode */
723   {4, 4},				/* cost of storing MMX registers
724 					   in SImode and DImode */
725   2,					/* cost of moving SSE register */
726   {4, 4, 6},				/* cost of loading SSE registers
727 					   in SImode, DImode and TImode */
728   {4, 4, 5},				/* cost of storing SSE registers
729 					   in SImode, DImode and TImode */
730   5,					/* MMX or SSE register to integer */
731   64,					/* size of l1 cache.  */
732   256,					/* size of l2 cache.  */
733   64,					/* size of prefetch block */
734   6,					/* number of parallel prefetches */
735   5,					/* Branch cost */
736   COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
737   COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
738   COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
739   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
740   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
741   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
742   athlon_memcpy,
743   athlon_memset,
744   1,					/* scalar_stmt_cost.  */
745   1,					/* scalar load_cost.  */
746   1,					/* scalar_store_cost.  */
747   1,					/* vec_stmt_cost.  */
748   1,					/* vec_to_scalar_cost.  */
749   1,					/* scalar_to_vec_cost.  */
750   1,					/* vec_align_load_cost.  */
751   2,					/* vec_unalign_load_cost.  */
752   1,					/* vec_store_cost.  */
753   3,					/* cond_taken_branch_cost.  */
754   1,					/* cond_not_taken_branch_cost.  */
755 };
756 
757 /* K8 has optimized REP instruction for medium sized blocks, but for very
758    small blocks it is better to use loop. For large blocks, libcall can
759    do nontemporary accesses and beat inline considerably.  */
760 static stringop_algs k8_memcpy[2] = {
761   {libcall, {{6, loop, false}, {14, unrolled_loop, false},
762              {-1, rep_prefix_4_byte, false}}},
763   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
764              {-1, libcall, false}}}};
765 static stringop_algs k8_memset[2] = {
766   {libcall, {{8, loop, false}, {24, unrolled_loop, false},
767              {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
768   {libcall, {{48, unrolled_loop, false},
769              {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
770 static const
771 struct processor_costs k8_cost = {
772   COSTS_N_INSNS (1),			/* cost of an add instruction */
773   COSTS_N_INSNS (2),			/* cost of a lea instruction */
774   COSTS_N_INSNS (1),			/* variable shift costs */
775   COSTS_N_INSNS (1),			/* constant shift costs */
776   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
777    COSTS_N_INSNS (4),			/*				 HI */
778    COSTS_N_INSNS (3),			/*				 SI */
779    COSTS_N_INSNS (4),			/*				 DI */
780    COSTS_N_INSNS (5)},			/*			      other */
781   0,					/* cost of multiply per each bit set */
782   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
783    COSTS_N_INSNS (26),			/*			    HI */
784    COSTS_N_INSNS (42),			/*			    SI */
785    COSTS_N_INSNS (74),			/*			    DI */
786    COSTS_N_INSNS (74)},			/*			    other */
787   COSTS_N_INSNS (1),			/* cost of movsx */
788   COSTS_N_INSNS (1),			/* cost of movzx */
789   8,					/* "large" insn */
790   9,					/* MOVE_RATIO */
791   4,				     /* cost for loading QImode using movzbl */
792   {3, 4, 3},				/* cost of loading integer registers
793 					   in QImode, HImode and SImode.
794 					   Relative to reg-reg move (2).  */
795   {3, 4, 3},				/* cost of storing integer registers */
796   4,					/* cost of reg,reg fld/fst */
797   {4, 4, 12},				/* cost of loading fp registers
798 					   in SFmode, DFmode and XFmode */
799   {6, 6, 8},				/* cost of storing fp registers
800 					   in SFmode, DFmode and XFmode */
801   2,					/* cost of moving MMX register */
802   {3, 3},				/* cost of loading MMX registers
803 					   in SImode and DImode */
804   {4, 4},				/* cost of storing MMX registers
805 					   in SImode and DImode */
806   2,					/* cost of moving SSE register */
807   {4, 3, 6},				/* cost of loading SSE registers
808 					   in SImode, DImode and TImode */
809   {4, 4, 5},				/* cost of storing SSE registers
810 					   in SImode, DImode and TImode */
811   5,					/* MMX or SSE register to integer */
812   64,					/* size of l1 cache.  */
813   512,					/* size of l2 cache.  */
814   64,					/* size of prefetch block */
815   /* New AMD processors never drop prefetches; if they cannot be performed
816      immediately, they are queued.  We set number of simultaneous prefetches
817      to a large constant to reflect this (it probably is not a good idea not
818      to limit number of prefetches at all, as their execution also takes some
819      time).  */
820   100,					/* number of parallel prefetches */
821   3,					/* Branch cost */
822   COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
823   COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
824   COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
825   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
826   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
827   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
828 
829   k8_memcpy,
830   k8_memset,
831   4,					/* scalar_stmt_cost.  */
832   2,					/* scalar load_cost.  */
833   2,					/* scalar_store_cost.  */
834   5,					/* vec_stmt_cost.  */
835   0,					/* vec_to_scalar_cost.  */
836   2,					/* scalar_to_vec_cost.  */
837   2,					/* vec_align_load_cost.  */
838   3,					/* vec_unalign_load_cost.  */
839   3,					/* vec_store_cost.  */
840   3,					/* cond_taken_branch_cost.  */
841   2,					/* cond_not_taken_branch_cost.  */
842 };
843 
844 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
845    very small blocks it is better to use loop. For large blocks, libcall can
846    do nontemporary accesses and beat inline considerably.  */
847 static stringop_algs amdfam10_memcpy[2] = {
848   {libcall, {{6, loop, false}, {14, unrolled_loop, false},
849              {-1, rep_prefix_4_byte, false}}},
850   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
851              {-1, libcall, false}}}};
852 static stringop_algs amdfam10_memset[2] = {
853   {libcall, {{8, loop, false}, {24, unrolled_loop, false},
854              {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
855   {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
856              {-1, libcall, false}}}};
857 struct processor_costs amdfam10_cost = {
858   COSTS_N_INSNS (1),			/* cost of an add instruction */
859   COSTS_N_INSNS (2),			/* cost of a lea instruction */
860   COSTS_N_INSNS (1),			/* variable shift costs */
861   COSTS_N_INSNS (1),			/* constant shift costs */
862   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
863    COSTS_N_INSNS (4),			/*				 HI */
864    COSTS_N_INSNS (3),			/*				 SI */
865    COSTS_N_INSNS (4),			/*				 DI */
866    COSTS_N_INSNS (5)},			/*			      other */
867   0,					/* cost of multiply per each bit set */
868   {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
869    COSTS_N_INSNS (35),			/*			    HI */
870    COSTS_N_INSNS (51),			/*			    SI */
871    COSTS_N_INSNS (83),			/*			    DI */
872    COSTS_N_INSNS (83)},			/*			    other */
873   COSTS_N_INSNS (1),			/* cost of movsx */
874   COSTS_N_INSNS (1),			/* cost of movzx */
875   8,					/* "large" insn */
876   9,					/* MOVE_RATIO */
877   4,				     /* cost for loading QImode using movzbl */
878   {3, 4, 3},				/* cost of loading integer registers
879 					   in QImode, HImode and SImode.
880 					   Relative to reg-reg move (2).  */
881   {3, 4, 3},				/* cost of storing integer registers */
882   4,					/* cost of reg,reg fld/fst */
883   {4, 4, 12},				/* cost of loading fp registers
884 		   			   in SFmode, DFmode and XFmode */
885   {6, 6, 8},				/* cost of storing fp registers
886  		   			   in SFmode, DFmode and XFmode */
887   2,					/* cost of moving MMX register */
888   {3, 3},				/* cost of loading MMX registers
889 					   in SImode and DImode */
890   {4, 4},				/* cost of storing MMX registers
891 					   in SImode and DImode */
892   2,					/* cost of moving SSE register */
893   {4, 4, 3},				/* cost of loading SSE registers
894 					   in SImode, DImode and TImode */
895   {4, 4, 5},				/* cost of storing SSE registers
896 					   in SImode, DImode and TImode */
897   3,					/* MMX or SSE register to integer */
898   					/* On K8:
899   					    MOVD reg64, xmmreg Double FSTORE 4
900 					    MOVD reg32, xmmreg Double FSTORE 4
901 					   On AMDFAM10:
902 					    MOVD reg64, xmmreg Double FADD 3
903 							       1/1  1/1
904 					    MOVD reg32, xmmreg Double FADD 3
905 							       1/1  1/1 */
906   64,					/* size of l1 cache.  */
907   512,					/* size of l2 cache.  */
908   64,					/* size of prefetch block */
909   /* New AMD processors never drop prefetches; if they cannot be performed
910      immediately, they are queued.  We set number of simultaneous prefetches
911      to a large constant to reflect this (it probably is not a good idea not
912      to limit number of prefetches at all, as their execution also takes some
913      time).  */
914   100,					/* number of parallel prefetches */
915   2,					/* Branch cost */
916   COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
917   COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
918   COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
919   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
920   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
921   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
922 
923   amdfam10_memcpy,
924   amdfam10_memset,
925   4,					/* scalar_stmt_cost.  */
926   2,					/* scalar load_cost.  */
927   2,					/* scalar_store_cost.  */
928   6,					/* vec_stmt_cost.  */
929   0,					/* vec_to_scalar_cost.  */
930   2,					/* scalar_to_vec_cost.  */
931   2,					/* vec_align_load_cost.  */
932   2,					/* vec_unalign_load_cost.  */
933   2,					/* vec_store_cost.  */
934   2,					/* cond_taken_branch_cost.  */
935   1,					/* cond_not_taken_branch_cost.  */
936 };
937 
938 /*  BDVER1 has optimized REP instruction for medium sized blocks, but for
939     very small blocks it is better to use loop. For large blocks, libcall
940     can do nontemporary accesses and beat inline considerably.  */
941 static stringop_algs bdver1_memcpy[2] = {
942   {libcall, {{6, loop, false}, {14, unrolled_loop, false},
943              {-1, rep_prefix_4_byte, false}}},
944   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
945              {-1, libcall, false}}}};
946 static stringop_algs bdver1_memset[2] = {
947   {libcall, {{8, loop, false}, {24, unrolled_loop, false},
948              {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
949   {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
950              {-1, libcall, false}}}};
951 
952 const struct processor_costs bdver1_cost = {
953   COSTS_N_INSNS (1),			/* cost of an add instruction */
954   COSTS_N_INSNS (1),			/* cost of a lea instruction */
955   COSTS_N_INSNS (1),			/* variable shift costs */
956   COSTS_N_INSNS (1),			/* constant shift costs */
957   {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
958    COSTS_N_INSNS (4),			/*				 HI */
959    COSTS_N_INSNS (4),			/*				 SI */
960    COSTS_N_INSNS (6),			/*				 DI */
961    COSTS_N_INSNS (6)},			/*			      other */
962   0,					/* cost of multiply per each bit set */
963   {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
964    COSTS_N_INSNS (35),			/*			    HI */
965    COSTS_N_INSNS (51),			/*			    SI */
966    COSTS_N_INSNS (83),			/*			    DI */
967    COSTS_N_INSNS (83)},			/*			    other */
968   COSTS_N_INSNS (1),			/* cost of movsx */
969   COSTS_N_INSNS (1),			/* cost of movzx */
970   8,					/* "large" insn */
971   9,					/* MOVE_RATIO */
972   4,				     /* cost for loading QImode using movzbl */
973   {5, 5, 4},				/* cost of loading integer registers
974 					   in QImode, HImode and SImode.
975 					   Relative to reg-reg move (2).  */
976   {4, 4, 4},				/* cost of storing integer registers */
977   2,					/* cost of reg,reg fld/fst */
978   {5, 5, 12},				/* cost of loading fp registers
979 		   			   in SFmode, DFmode and XFmode */
980   {4, 4, 8},				/* cost of storing fp registers
981  		   			   in SFmode, DFmode and XFmode */
982   2,					/* cost of moving MMX register */
983   {4, 4},				/* cost of loading MMX registers
984 					   in SImode and DImode */
985   {4, 4},				/* cost of storing MMX registers
986 					   in SImode and DImode */
987   2,					/* cost of moving SSE register */
988   {4, 4, 4},				/* cost of loading SSE registers
989 					   in SImode, DImode and TImode */
990   {4, 4, 4},				/* cost of storing SSE registers
991 					   in SImode, DImode and TImode */
992   2,					/* MMX or SSE register to integer */
993   					/* On K8:
994 					    MOVD reg64, xmmreg Double FSTORE 4
995 					    MOVD reg32, xmmreg Double FSTORE 4
996 					   On AMDFAM10:
997 					    MOVD reg64, xmmreg Double FADD 3
998 							       1/1  1/1
999 					    MOVD reg32, xmmreg Double FADD 3
1000 							       1/1  1/1 */
1001   16,					/* size of l1 cache.  */
1002   2048,					/* size of l2 cache.  */
1003   64,					/* size of prefetch block */
1004   /* New AMD processors never drop prefetches; if they cannot be performed
1005      immediately, they are queued.  We set number of simultaneous prefetches
1006      to a large constant to reflect this (it probably is not a good idea not
1007      to limit number of prefetches at all, as their execution also takes some
1008      time).  */
1009   100,					/* number of parallel prefetches */
1010   2,					/* Branch cost */
1011   COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
1012   COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
1013   COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
1014   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1015   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1016   COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
1017 
1018   bdver1_memcpy,
1019   bdver1_memset,
1020   6,					/* scalar_stmt_cost.  */
1021   4,					/* scalar load_cost.  */
1022   4,					/* scalar_store_cost.  */
1023   6,					/* vec_stmt_cost.  */
1024   0,					/* vec_to_scalar_cost.  */
1025   2,					/* scalar_to_vec_cost.  */
1026   4,					/* vec_align_load_cost.  */
1027   4,					/* vec_unalign_load_cost.  */
1028   4,					/* vec_store_cost.  */
1029   2,					/* cond_taken_branch_cost.  */
1030   1,					/* cond_not_taken_branch_cost.  */
1031 };
1032 
1033 /*  BDVER2 has optimized REP instruction for medium sized blocks, but for
1034     very small blocks it is better to use loop. For large blocks, libcall
1035     can do nontemporary accesses and beat inline considerably.  */
1036 
1037 static stringop_algs bdver2_memcpy[2] = {
1038   {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1039              {-1, rep_prefix_4_byte, false}}},
1040   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1041              {-1, libcall, false}}}};
1042 static stringop_algs bdver2_memset[2] = {
1043   {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1044              {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1045   {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1046              {-1, libcall, false}}}};
1047 
1048 const struct processor_costs bdver2_cost = {
1049   COSTS_N_INSNS (1),			/* cost of an add instruction */
1050   COSTS_N_INSNS (1),			/* cost of a lea instruction */
1051   COSTS_N_INSNS (1),			/* variable shift costs */
1052   COSTS_N_INSNS (1),			/* constant shift costs */
1053   {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
1054    COSTS_N_INSNS (4),			/*				 HI */
1055    COSTS_N_INSNS (4),			/*				 SI */
1056    COSTS_N_INSNS (6),			/*				 DI */
1057    COSTS_N_INSNS (6)},			/*			      other */
1058   0,					/* cost of multiply per each bit set */
1059   {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
1060    COSTS_N_INSNS (35),			/*			    HI */
1061    COSTS_N_INSNS (51),			/*			    SI */
1062    COSTS_N_INSNS (83),			/*			    DI */
1063    COSTS_N_INSNS (83)},			/*			    other */
1064   COSTS_N_INSNS (1),			/* cost of movsx */
1065   COSTS_N_INSNS (1),			/* cost of movzx */
1066   8,					/* "large" insn */
1067   9,					/* MOVE_RATIO */
1068   4,				     /* cost for loading QImode using movzbl */
1069   {5, 5, 4},				/* cost of loading integer registers
1070 					   in QImode, HImode and SImode.
1071 					   Relative to reg-reg move (2).  */
1072   {4, 4, 4},				/* cost of storing integer registers */
1073   2,					/* cost of reg,reg fld/fst */
1074   {5, 5, 12},				/* cost of loading fp registers
1075 		   			   in SFmode, DFmode and XFmode */
1076   {4, 4, 8},				/* cost of storing fp registers
1077  		   			   in SFmode, DFmode and XFmode */
1078   2,					/* cost of moving MMX register */
1079   {4, 4},				/* cost of loading MMX registers
1080 					   in SImode and DImode */
1081   {4, 4},				/* cost of storing MMX registers
1082 					   in SImode and DImode */
1083   2,					/* cost of moving SSE register */
1084   {4, 4, 4},				/* cost of loading SSE registers
1085 					   in SImode, DImode and TImode */
1086   {4, 4, 4},				/* cost of storing SSE registers
1087 					   in SImode, DImode and TImode */
1088   2,					/* MMX or SSE register to integer */
1089   					/* On K8:
1090 					    MOVD reg64, xmmreg Double FSTORE 4
1091 					    MOVD reg32, xmmreg Double FSTORE 4
1092 					   On AMDFAM10:
1093 					    MOVD reg64, xmmreg Double FADD 3
1094 							       1/1  1/1
1095 					    MOVD reg32, xmmreg Double FADD 3
1096 							       1/1  1/1 */
1097   16,					/* size of l1 cache.  */
1098   2048,					/* size of l2 cache.  */
1099   64,					/* size of prefetch block */
1100   /* New AMD processors never drop prefetches; if they cannot be performed
1101      immediately, they are queued.  We set number of simultaneous prefetches
1102      to a large constant to reflect this (it probably is not a good idea not
1103      to limit number of prefetches at all, as their execution also takes some
1104      time).  */
1105   100,					/* number of parallel prefetches */
1106   2,					/* Branch cost */
1107   COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
1108   COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
1109   COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
1110   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1111   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1112   COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
1113 
1114   bdver2_memcpy,
1115   bdver2_memset,
1116   6,					/* scalar_stmt_cost.  */
1117   4,					/* scalar load_cost.  */
1118   4,					/* scalar_store_cost.  */
1119   6,					/* vec_stmt_cost.  */
1120   0,					/* vec_to_scalar_cost.  */
1121   2,					/* scalar_to_vec_cost.  */
1122   4,					/* vec_align_load_cost.  */
1123   4,					/* vec_unalign_load_cost.  */
1124   4,					/* vec_store_cost.  */
1125   2,					/* cond_taken_branch_cost.  */
1126   1,					/* cond_not_taken_branch_cost.  */
1127 };
1128 
1129 
1130   /*  BDVER3 has optimized REP instruction for medium sized blocks, but for
1131       very small blocks it is better to use loop. For large blocks, libcall
1132       can do nontemporary accesses and beat inline considerably.  */
1133 static stringop_algs bdver3_memcpy[2] = {
1134   {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1135              {-1, rep_prefix_4_byte, false}}},
1136   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1137              {-1, libcall, false}}}};
1138 static stringop_algs bdver3_memset[2] = {
1139   {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1140              {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1141   {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1142              {-1, libcall, false}}}};
1143 struct processor_costs bdver3_cost = {
1144   COSTS_N_INSNS (1),			/* cost of an add instruction */
1145   COSTS_N_INSNS (1),			/* cost of a lea instruction */
1146   COSTS_N_INSNS (1),			/* variable shift costs */
1147   COSTS_N_INSNS (1),			/* constant shift costs */
1148   {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
1149    COSTS_N_INSNS (4),			/*				 HI */
1150    COSTS_N_INSNS (4),			/*				 SI */
1151    COSTS_N_INSNS (6),			/*				 DI */
1152    COSTS_N_INSNS (6)},			/*			      other */
1153   0,					/* cost of multiply per each bit set */
1154   {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
1155    COSTS_N_INSNS (35),			/*			    HI */
1156    COSTS_N_INSNS (51),			/*			    SI */
1157    COSTS_N_INSNS (83),			/*			    DI */
1158    COSTS_N_INSNS (83)},			/*			    other */
1159   COSTS_N_INSNS (1),			/* cost of movsx */
1160   COSTS_N_INSNS (1),			/* cost of movzx */
1161   8,					/* "large" insn */
1162   9,					/* MOVE_RATIO */
1163   4,				     /* cost for loading QImode using movzbl */
1164   {5, 5, 4},				/* cost of loading integer registers
1165 					   in QImode, HImode and SImode.
1166 					   Relative to reg-reg move (2).  */
1167   {4, 4, 4},				/* cost of storing integer registers */
1168   2,					/* cost of reg,reg fld/fst */
1169   {5, 5, 12},				/* cost of loading fp registers
1170 		   			   in SFmode, DFmode and XFmode */
1171   {4, 4, 8},				/* cost of storing fp registers
1172  		   			   in SFmode, DFmode and XFmode */
1173   2,					/* cost of moving MMX register */
1174   {4, 4},				/* cost of loading MMX registers
1175 					   in SImode and DImode */
1176   {4, 4},				/* cost of storing MMX registers
1177 					   in SImode and DImode */
1178   2,					/* cost of moving SSE register */
1179   {4, 4, 4},				/* cost of loading SSE registers
1180 					   in SImode, DImode and TImode */
1181   {4, 4, 4},				/* cost of storing SSE registers
1182 					   in SImode, DImode and TImode */
1183   2,					/* MMX or SSE register to integer */
1184   16,					/* size of l1 cache.  */
1185   2048,					/* size of l2 cache.  */
1186   64,					/* size of prefetch block */
1187   /* New AMD processors never drop prefetches; if they cannot be performed
1188      immediately, they are queued.  We set number of simultaneous prefetches
1189      to a large constant to reflect this (it probably is not a good idea not
1190      to limit number of prefetches at all, as their execution also takes some
1191      time).  */
1192   100,					/* number of parallel prefetches */
1193   2,					/* Branch cost */
1194   COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
1195   COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
1196   COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
1197   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1198   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1199   COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
1200 
1201   bdver3_memcpy,
1202   bdver3_memset,
1203   6,					/* scalar_stmt_cost.  */
1204   4,					/* scalar load_cost.  */
1205   4,					/* scalar_store_cost.  */
1206   6,					/* vec_stmt_cost.  */
1207   0,					/* vec_to_scalar_cost.  */
1208   2,					/* scalar_to_vec_cost.  */
1209   4,					/* vec_align_load_cost.  */
1210   4,					/* vec_unalign_load_cost.  */
1211   4,					/* vec_store_cost.  */
1212   2,					/* cond_taken_branch_cost.  */
1213   1,					/* cond_not_taken_branch_cost.  */
1214 };
1215 
1216 /*  BDVER4 has optimized REP instruction for medium sized blocks, but for
1217     very small blocks it is better to use loop. For large blocks, libcall
1218     can do nontemporary accesses and beat inline considerably.  */
1219 static stringop_algs bdver4_memcpy[2] = {
1220   {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1221              {-1, rep_prefix_4_byte, false}}},
1222   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1223              {-1, libcall, false}}}};
1224 static stringop_algs bdver4_memset[2] = {
1225   {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1226              {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1227   {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1228              {-1, libcall, false}}}};
1229 struct processor_costs bdver4_cost = {
1230   COSTS_N_INSNS (1),			/* cost of an add instruction */
1231   COSTS_N_INSNS (1),			/* cost of a lea instruction */
1232   COSTS_N_INSNS (1),			/* variable shift costs */
1233   COSTS_N_INSNS (1),			/* constant shift costs */
1234   {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
1235    COSTS_N_INSNS (4),			/*				 HI */
1236    COSTS_N_INSNS (4),			/*				 SI */
1237    COSTS_N_INSNS (6),			/*				 DI */
1238    COSTS_N_INSNS (6)},			/*			      other */
1239   0,					/* cost of multiply per each bit set */
1240   {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
1241    COSTS_N_INSNS (35),			/*			    HI */
1242    COSTS_N_INSNS (51),			/*			    SI */
1243    COSTS_N_INSNS (83),			/*			    DI */
1244    COSTS_N_INSNS (83)},			/*			    other */
1245   COSTS_N_INSNS (1),			/* cost of movsx */
1246   COSTS_N_INSNS (1),			/* cost of movzx */
1247   8,					/* "large" insn */
1248   9,					/* MOVE_RATIO */
1249   4,				     /* cost for loading QImode using movzbl */
1250   {5, 5, 4},				/* cost of loading integer registers
1251 					   in QImode, HImode and SImode.
1252 					   Relative to reg-reg move (2).  */
1253   {4, 4, 4},				/* cost of storing integer registers */
1254   2,					/* cost of reg,reg fld/fst */
1255   {5, 5, 12},				/* cost of loading fp registers
1256 		   			   in SFmode, DFmode and XFmode */
1257   {4, 4, 8},				/* cost of storing fp registers
1258  		   			   in SFmode, DFmode and XFmode */
1259   2,					/* cost of moving MMX register */
1260   {4, 4},				/* cost of loading MMX registers
1261 					   in SImode and DImode */
1262   {4, 4},				/* cost of storing MMX registers
1263 					   in SImode and DImode */
1264   2,					/* cost of moving SSE register */
1265   {4, 4, 4},				/* cost of loading SSE registers
1266 					   in SImode, DImode and TImode */
1267   {4, 4, 4},				/* cost of storing SSE registers
1268 					   in SImode, DImode and TImode */
1269   2,					/* MMX or SSE register to integer */
1270   16,					/* size of l1 cache.  */
1271   2048,					/* size of l2 cache.  */
1272   64,					/* size of prefetch block */
1273   /* New AMD processors never drop prefetches; if they cannot be performed
1274      immediately, they are queued.  We set number of simultaneous prefetches
1275      to a large constant to reflect this (it probably is not a good idea not
1276      to limit number of prefetches at all, as their execution also takes some
1277      time).  */
1278   100,					/* number of parallel prefetches */
1279   2,					/* Branch cost */
1280   COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
1281   COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
1282   COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
1283   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1284   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1285   COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
1286 
1287   bdver4_memcpy,
1288   bdver4_memset,
1289   6,					/* scalar_stmt_cost.  */
1290   4,					/* scalar load_cost.  */
1291   4,					/* scalar_store_cost.  */
1292   6,					/* vec_stmt_cost.  */
1293   0,					/* vec_to_scalar_cost.  */
1294   2,					/* scalar_to_vec_cost.  */
1295   4,					/* vec_align_load_cost.  */
1296   4,					/* vec_unalign_load_cost.  */
1297   4,					/* vec_store_cost.  */
1298   2,					/* cond_taken_branch_cost.  */
1299   1,					/* cond_not_taken_branch_cost.  */
1300 };
1301 
1302   /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1303      very small blocks it is better to use loop. For large blocks, libcall can
1304      do nontemporary accesses and beat inline considerably.  */
1305 static stringop_algs btver1_memcpy[2] = {
1306   {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1307              {-1, rep_prefix_4_byte, false}}},
1308   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1309              {-1, libcall, false}}}};
1310 static stringop_algs btver1_memset[2] = {
1311   {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1312              {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1313   {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1314              {-1, libcall, false}}}};
1315 const struct processor_costs btver1_cost = {
1316   COSTS_N_INSNS (1),			/* cost of an add instruction */
1317   COSTS_N_INSNS (2),			/* cost of a lea instruction */
1318   COSTS_N_INSNS (1),			/* variable shift costs */
1319   COSTS_N_INSNS (1),			/* constant shift costs */
1320   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1321    COSTS_N_INSNS (4),			/*				 HI */
1322    COSTS_N_INSNS (3),			/*				 SI */
1323    COSTS_N_INSNS (4),			/*				 DI */
1324    COSTS_N_INSNS (5)},			/*			      other */
1325   0,					/* cost of multiply per each bit set */
1326   {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
1327    COSTS_N_INSNS (35),			/*			    HI */
1328    COSTS_N_INSNS (51),			/*			    SI */
1329    COSTS_N_INSNS (83),			/*			    DI */
1330    COSTS_N_INSNS (83)},			/*			    other */
1331   COSTS_N_INSNS (1),			/* cost of movsx */
1332   COSTS_N_INSNS (1),			/* cost of movzx */
1333   8,					/* "large" insn */
1334   9,					/* MOVE_RATIO */
1335   4,				     /* cost for loading QImode using movzbl */
1336   {3, 4, 3},				/* cost of loading integer registers
1337 					   in QImode, HImode and SImode.
1338 					   Relative to reg-reg move (2).  */
1339   {3, 4, 3},				/* cost of storing integer registers */
1340   4,					/* cost of reg,reg fld/fst */
1341   {4, 4, 12},				/* cost of loading fp registers
1342 					   in SFmode, DFmode and XFmode */
1343   {6, 6, 8},				/* cost of storing fp registers
1344 					   in SFmode, DFmode and XFmode */
1345   2,					/* cost of moving MMX register */
1346   {3, 3},				/* cost of loading MMX registers
1347 					   in SImode and DImode */
1348   {4, 4},				/* cost of storing MMX registers
1349 					   in SImode and DImode */
1350   2,					/* cost of moving SSE register */
1351   {4, 4, 3},				/* cost of loading SSE registers
1352 					   in SImode, DImode and TImode */
1353   {4, 4, 5},				/* cost of storing SSE registers
1354 					   in SImode, DImode and TImode */
1355   3,					/* MMX or SSE register to integer */
1356 					/* On K8:
1357 					   MOVD reg64, xmmreg Double FSTORE 4
1358 					   MOVD reg32, xmmreg Double FSTORE 4
1359 					   On AMDFAM10:
1360 					   MOVD reg64, xmmreg Double FADD 3
1361 							       1/1  1/1
1362 					    MOVD reg32, xmmreg Double FADD 3
1363 							       1/1  1/1 */
1364   32,					/* size of l1 cache.  */
1365   512,					/* size of l2 cache.  */
1366   64,					/* size of prefetch block */
1367   100,					/* number of parallel prefetches */
1368   2,					/* Branch cost */
1369   COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
1370   COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
1371   COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
1372   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1373   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1374   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
1375 
1376   btver1_memcpy,
1377   btver1_memset,
1378   4,					/* scalar_stmt_cost.  */
1379   2,					/* scalar load_cost.  */
1380   2,					/* scalar_store_cost.  */
1381   6,					/* vec_stmt_cost.  */
1382   0,					/* vec_to_scalar_cost.  */
1383   2,					/* scalar_to_vec_cost.  */
1384   2,					/* vec_align_load_cost.  */
1385   2,					/* vec_unalign_load_cost.  */
1386   2,					/* vec_store_cost.  */
1387   2,					/* cond_taken_branch_cost.  */
1388   1,					/* cond_not_taken_branch_cost.  */
1389 };
1390 
1391 static stringop_algs btver2_memcpy[2] = {
1392   {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1393              {-1, rep_prefix_4_byte, false}}},
1394   {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1395              {-1, libcall, false}}}};
1396 static stringop_algs btver2_memset[2] = {
1397   {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1398              {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1399   {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1400              {-1, libcall, false}}}};
1401 const struct processor_costs btver2_cost = {
1402   COSTS_N_INSNS (1),			/* cost of an add instruction */
1403   COSTS_N_INSNS (2),			/* cost of a lea instruction */
1404   COSTS_N_INSNS (1),			/* variable shift costs */
1405   COSTS_N_INSNS (1),			/* constant shift costs */
1406   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1407    COSTS_N_INSNS (4),			/*				 HI */
1408    COSTS_N_INSNS (3),			/*				 SI */
1409    COSTS_N_INSNS (4),			/*				 DI */
1410    COSTS_N_INSNS (5)},			/*			      other */
1411   0,					/* cost of multiply per each bit set */
1412   {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
1413    COSTS_N_INSNS (35),			/*			    HI */
1414    COSTS_N_INSNS (51),			/*			    SI */
1415    COSTS_N_INSNS (83),			/*			    DI */
1416    COSTS_N_INSNS (83)},			/*			    other */
1417   COSTS_N_INSNS (1),			/* cost of movsx */
1418   COSTS_N_INSNS (1),			/* cost of movzx */
1419   8,					/* "large" insn */
1420   9,					/* MOVE_RATIO */
1421   4,				     /* cost for loading QImode using movzbl */
1422   {3, 4, 3},				/* cost of loading integer registers
1423 					   in QImode, HImode and SImode.
1424 					   Relative to reg-reg move (2).  */
1425   {3, 4, 3},				/* cost of storing integer registers */
1426   4,					/* cost of reg,reg fld/fst */
1427   {4, 4, 12},				/* cost of loading fp registers
1428 					   in SFmode, DFmode and XFmode */
1429   {6, 6, 8},				/* cost of storing fp registers
1430 					   in SFmode, DFmode and XFmode */
1431   2,					/* cost of moving MMX register */
1432   {3, 3},				/* cost of loading MMX registers
1433 					   in SImode and DImode */
1434   {4, 4},				/* cost of storing MMX registers
1435 					   in SImode and DImode */
1436   2,					/* cost of moving SSE register */
1437   {4, 4, 3},				/* cost of loading SSE registers
1438 					   in SImode, DImode and TImode */
1439   {4, 4, 5},				/* cost of storing SSE registers
1440 					   in SImode, DImode and TImode */
1441   3,					/* MMX or SSE register to integer */
1442 					/* On K8:
1443 					   MOVD reg64, xmmreg Double FSTORE 4
1444 					   MOVD reg32, xmmreg Double FSTORE 4
1445 					   On AMDFAM10:
1446 					   MOVD reg64, xmmreg Double FADD 3
1447 							       1/1  1/1
1448 					    MOVD reg32, xmmreg Double FADD 3
1449 							       1/1  1/1 */
1450   32,					/* size of l1 cache.  */
1451   2048,					/* size of l2 cache.  */
1452   64,					/* size of prefetch block */
1453   100,					/* number of parallel prefetches */
1454   2,					/* Branch cost */
1455   COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
1456   COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
1457   COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
1458   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1459   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1460   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
1461   btver2_memcpy,
1462   btver2_memset,
1463   4,					/* scalar_stmt_cost.  */
1464   2,					/* scalar load_cost.  */
1465   2,					/* scalar_store_cost.  */
1466   6,					/* vec_stmt_cost.  */
1467   0,					/* vec_to_scalar_cost.  */
1468   2,					/* scalar_to_vec_cost.  */
1469   2,					/* vec_align_load_cost.  */
1470   2,					/* vec_unalign_load_cost.  */
1471   2,					/* vec_store_cost.  */
1472   2,					/* cond_taken_branch_cost.  */
1473   1,					/* cond_not_taken_branch_cost.  */
1474 };
1475 
1476 static stringop_algs pentium4_memcpy[2] = {
1477   {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1478   DUMMY_STRINGOP_ALGS};
1479 static stringop_algs pentium4_memset[2] = {
1480   {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1481              {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1482   DUMMY_STRINGOP_ALGS};
1483 
1484 static const
1485 struct processor_costs pentium4_cost = {
1486   COSTS_N_INSNS (1),			/* cost of an add instruction */
1487   COSTS_N_INSNS (3),			/* cost of a lea instruction */
1488   COSTS_N_INSNS (4),			/* variable shift costs */
1489   COSTS_N_INSNS (4),			/* constant shift costs */
1490   {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
1491    COSTS_N_INSNS (15),			/*				 HI */
1492    COSTS_N_INSNS (15),			/*				 SI */
1493    COSTS_N_INSNS (15),			/*				 DI */
1494    COSTS_N_INSNS (15)},			/*			      other */
1495   0,					/* cost of multiply per each bit set */
1496   {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
1497    COSTS_N_INSNS (56),			/*			    HI */
1498    COSTS_N_INSNS (56),			/*			    SI */
1499    COSTS_N_INSNS (56),			/*			    DI */
1500    COSTS_N_INSNS (56)},			/*			    other */
1501   COSTS_N_INSNS (1),			/* cost of movsx */
1502   COSTS_N_INSNS (1),			/* cost of movzx */
1503   16,					/* "large" insn */
1504   6,					/* MOVE_RATIO */
1505   2,				     /* cost for loading QImode using movzbl */
1506   {4, 5, 4},				/* cost of loading integer registers
1507 					   in QImode, HImode and SImode.
1508 					   Relative to reg-reg move (2).  */
1509   {2, 3, 2},				/* cost of storing integer registers */
1510   2,					/* cost of reg,reg fld/fst */
1511   {2, 2, 6},				/* cost of loading fp registers
1512 					   in SFmode, DFmode and XFmode */
1513   {4, 4, 6},				/* cost of storing fp registers
1514 					   in SFmode, DFmode and XFmode */
1515   2,					/* cost of moving MMX register */
1516   {2, 2},				/* cost of loading MMX registers
1517 					   in SImode and DImode */
1518   {2, 2},				/* cost of storing MMX registers
1519 					   in SImode and DImode */
1520   12,					/* cost of moving SSE register */
1521   {12, 12, 12},				/* cost of loading SSE registers
1522 					   in SImode, DImode and TImode */
1523   {2, 2, 8},				/* cost of storing SSE registers
1524 					   in SImode, DImode and TImode */
1525   10,					/* MMX or SSE register to integer */
1526   8,					/* size of l1 cache.  */
1527   256,					/* size of l2 cache.  */
1528   64,					/* size of prefetch block */
1529   6,					/* number of parallel prefetches */
1530   2,					/* Branch cost */
1531   COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
1532   COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
1533   COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
1534   COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1535   COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1536   COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
1537   pentium4_memcpy,
1538   pentium4_memset,
1539   1,					/* scalar_stmt_cost.  */
1540   1,					/* scalar load_cost.  */
1541   1,					/* scalar_store_cost.  */
1542   1,					/* vec_stmt_cost.  */
1543   1,					/* vec_to_scalar_cost.  */
1544   1,					/* scalar_to_vec_cost.  */
1545   1,					/* vec_align_load_cost.  */
1546   2,					/* vec_unalign_load_cost.  */
1547   1,					/* vec_store_cost.  */
1548   3,					/* cond_taken_branch_cost.  */
1549   1,					/* cond_not_taken_branch_cost.  */
1550 };
1551 
1552 static stringop_algs nocona_memcpy[2] = {
1553   {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1554   {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1555              {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1556 
1557 static stringop_algs nocona_memset[2] = {
1558   {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1559              {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1560   {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1561              {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1562 
1563 static const
1564 struct processor_costs nocona_cost = {
1565   COSTS_N_INSNS (1),			/* cost of an add instruction */
1566   COSTS_N_INSNS (1),			/* cost of a lea instruction */
1567   COSTS_N_INSNS (1),			/* variable shift costs */
1568   COSTS_N_INSNS (1),			/* constant shift costs */
1569   {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
1570    COSTS_N_INSNS (10),			/*				 HI */
1571    COSTS_N_INSNS (10),			/*				 SI */
1572    COSTS_N_INSNS (10),			/*				 DI */
1573    COSTS_N_INSNS (10)},			/*			      other */
1574   0,					/* cost of multiply per each bit set */
1575   {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
1576    COSTS_N_INSNS (66),			/*			    HI */
1577    COSTS_N_INSNS (66),			/*			    SI */
1578    COSTS_N_INSNS (66),			/*			    DI */
1579    COSTS_N_INSNS (66)},			/*			    other */
1580   COSTS_N_INSNS (1),			/* cost of movsx */
1581   COSTS_N_INSNS (1),			/* cost of movzx */
1582   16,					/* "large" insn */
1583   17,					/* MOVE_RATIO */
1584   4,				     /* cost for loading QImode using movzbl */
1585   {4, 4, 4},				/* cost of loading integer registers
1586 					   in QImode, HImode and SImode.
1587 					   Relative to reg-reg move (2).  */
1588   {4, 4, 4},				/* cost of storing integer registers */
1589   3,					/* cost of reg,reg fld/fst */
1590   {12, 12, 12},				/* cost of loading fp registers
1591 					   in SFmode, DFmode and XFmode */
1592   {4, 4, 4},				/* cost of storing fp registers
1593 					   in SFmode, DFmode and XFmode */
1594   6,					/* cost of moving MMX register */
1595   {12, 12},				/* cost of loading MMX registers
1596 					   in SImode and DImode */
1597   {12, 12},				/* cost of storing MMX registers
1598 					   in SImode and DImode */
1599   6,					/* cost of moving SSE register */
1600   {12, 12, 12},				/* cost of loading SSE registers
1601 					   in SImode, DImode and TImode */
1602   {12, 12, 12},				/* cost of storing SSE registers
1603 					   in SImode, DImode and TImode */
1604   8,					/* MMX or SSE register to integer */
1605   8,					/* size of l1 cache.  */
1606   1024,					/* size of l2 cache.  */
1607   64,					/* size of prefetch block */
1608   8,					/* number of parallel prefetches */
1609   1,					/* Branch cost */
1610   COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
1611   COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1612   COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
1613   COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
1614   COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
1615   COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
1616   nocona_memcpy,
1617   nocona_memset,
1618   1,					/* scalar_stmt_cost.  */
1619   1,					/* scalar load_cost.  */
1620   1,					/* scalar_store_cost.  */
1621   1,					/* vec_stmt_cost.  */
1622   1,					/* vec_to_scalar_cost.  */
1623   1,					/* scalar_to_vec_cost.  */
1624   1,					/* vec_align_load_cost.  */
1625   2,					/* vec_unalign_load_cost.  */
1626   1,					/* vec_store_cost.  */
1627   3,					/* cond_taken_branch_cost.  */
1628   1,					/* cond_not_taken_branch_cost.  */
1629 };
1630 
1631 static stringop_algs atom_memcpy[2] = {
1632   {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1633   {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1634              {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1635 static stringop_algs atom_memset[2] = {
1636   {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1637              {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1638   {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1639              {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1640 static const
1641 struct processor_costs atom_cost = {
1642   COSTS_N_INSNS (1),			/* cost of an add instruction */
1643   COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1644   COSTS_N_INSNS (1),			/* variable shift costs */
1645   COSTS_N_INSNS (1),			/* constant shift costs */
1646   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1647    COSTS_N_INSNS (4),			/*				 HI */
1648    COSTS_N_INSNS (3),			/*				 SI */
1649    COSTS_N_INSNS (4),			/*				 DI */
1650    COSTS_N_INSNS (2)},			/*			      other */
1651   0,					/* cost of multiply per each bit set */
1652   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1653    COSTS_N_INSNS (26),			/*			    HI */
1654    COSTS_N_INSNS (42),			/*			    SI */
1655    COSTS_N_INSNS (74),			/*			    DI */
1656    COSTS_N_INSNS (74)},			/*			    other */
1657   COSTS_N_INSNS (1),			/* cost of movsx */
1658   COSTS_N_INSNS (1),			/* cost of movzx */
1659   8,					/* "large" insn */
1660   17,					/* MOVE_RATIO */
1661   4,					/* cost for loading QImode using movzbl */
1662   {4, 4, 4},				/* cost of loading integer registers
1663 					   in QImode, HImode and SImode.
1664 					   Relative to reg-reg move (2).  */
1665   {4, 4, 4},				/* cost of storing integer registers */
1666   4,					/* cost of reg,reg fld/fst */
1667   {12, 12, 12},				/* cost of loading fp registers
1668 					   in SFmode, DFmode and XFmode */
1669   {6, 6, 8},				/* cost of storing fp registers
1670 					   in SFmode, DFmode and XFmode */
1671   2,					/* cost of moving MMX register */
1672   {8, 8},				/* cost of loading MMX registers
1673 					   in SImode and DImode */
1674   {8, 8},				/* cost of storing MMX registers
1675 					   in SImode and DImode */
1676   2,					/* cost of moving SSE register */
1677   {8, 8, 8},				/* cost of loading SSE registers
1678 					   in SImode, DImode and TImode */
1679   {8, 8, 8},				/* cost of storing SSE registers
1680 					   in SImode, DImode and TImode */
1681   5,					/* MMX or SSE register to integer */
1682   32,					/* size of l1 cache.  */
1683   256,					/* size of l2 cache.  */
1684   64,					/* size of prefetch block */
1685   6,					/* number of parallel prefetches */
1686   3,					/* Branch cost */
1687   COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
1688   COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1689   COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
1690   COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
1691   COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
1692   COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
1693   atom_memcpy,
1694   atom_memset,
1695   1,					/* scalar_stmt_cost.  */
1696   1,					/* scalar load_cost.  */
1697   1,					/* scalar_store_cost.  */
1698   1,					/* vec_stmt_cost.  */
1699   1,					/* vec_to_scalar_cost.  */
1700   1,					/* scalar_to_vec_cost.  */
1701   1,					/* vec_align_load_cost.  */
1702   2,					/* vec_unalign_load_cost.  */
1703   1,					/* vec_store_cost.  */
1704   3,					/* cond_taken_branch_cost.  */
1705   1,					/* cond_not_taken_branch_cost.  */
1706 };
1707 
1708 static stringop_algs slm_memcpy[2] = {
1709   {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1710   {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1711              {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1712 static stringop_algs slm_memset[2] = {
1713   {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1714              {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1715   {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1716              {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1717 static const
1718 struct processor_costs slm_cost = {
1719   COSTS_N_INSNS (1),			/* cost of an add instruction */
1720   COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1721   COSTS_N_INSNS (1),			/* variable shift costs */
1722   COSTS_N_INSNS (1),			/* constant shift costs */
1723   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1724    COSTS_N_INSNS (3),			/*				 HI */
1725    COSTS_N_INSNS (3),			/*				 SI */
1726    COSTS_N_INSNS (4),			/*				 DI */
1727    COSTS_N_INSNS (2)},			/*			      other */
1728   0,					/* cost of multiply per each bit set */
1729   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1730    COSTS_N_INSNS (26),			/*			    HI */
1731    COSTS_N_INSNS (42),			/*			    SI */
1732    COSTS_N_INSNS (74),			/*			    DI */
1733    COSTS_N_INSNS (74)},			/*			    other */
1734   COSTS_N_INSNS (1),			/* cost of movsx */
1735   COSTS_N_INSNS (1),			/* cost of movzx */
1736   8,					/* "large" insn */
1737   17,					/* MOVE_RATIO */
1738   4,					/* cost for loading QImode using movzbl */
1739   {4, 4, 4},				/* cost of loading integer registers
1740 					   in QImode, HImode and SImode.
1741 					   Relative to reg-reg move (2).  */
1742   {4, 4, 4},				/* cost of storing integer registers */
1743   4,					/* cost of reg,reg fld/fst */
1744   {12, 12, 12},				/* cost of loading fp registers
1745 					   in SFmode, DFmode and XFmode */
1746   {6, 6, 8},				/* cost of storing fp registers
1747 					   in SFmode, DFmode and XFmode */
1748   2,					/* cost of moving MMX register */
1749   {8, 8},				/* cost of loading MMX registers
1750 					   in SImode and DImode */
1751   {8, 8},				/* cost of storing MMX registers
1752 					   in SImode and DImode */
1753   2,					/* cost of moving SSE register */
1754   {8, 8, 8},				/* cost of loading SSE registers
1755 					   in SImode, DImode and TImode */
1756   {8, 8, 8},				/* cost of storing SSE registers
1757 					   in SImode, DImode and TImode */
1758   5,					/* MMX or SSE register to integer */
1759   32,					/* size of l1 cache.  */
1760   256,					/* size of l2 cache.  */
1761   64,					/* size of prefetch block */
1762   6,					/* number of parallel prefetches */
1763   3,					/* Branch cost */
1764   COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
1765   COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1766   COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
1767   COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
1768   COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
1769   COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
1770   slm_memcpy,
1771   slm_memset,
1772   1,					/* scalar_stmt_cost.  */
1773   1,					/* scalar load_cost.  */
1774   1,					/* scalar_store_cost.  */
1775   1,					/* vec_stmt_cost.  */
1776   4,					/* vec_to_scalar_cost.  */
1777   1,					/* scalar_to_vec_cost.  */
1778   1,					/* vec_align_load_cost.  */
1779   2,					/* vec_unalign_load_cost.  */
1780   1,					/* vec_store_cost.  */
1781   3,					/* cond_taken_branch_cost.  */
1782   1,					/* cond_not_taken_branch_cost.  */
1783 };
1784 
1785 static stringop_algs intel_memcpy[2] = {
1786   {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1787   {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1788              {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1789 static stringop_algs intel_memset[2] = {
1790   {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1791              {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1792   {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1793              {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1794 static const
1795 struct processor_costs intel_cost = {
1796   COSTS_N_INSNS (1),			/* cost of an add instruction */
1797   COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1798   COSTS_N_INSNS (1),			/* variable shift costs */
1799   COSTS_N_INSNS (1),			/* constant shift costs */
1800   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1801    COSTS_N_INSNS (3),			/*				 HI */
1802    COSTS_N_INSNS (3),			/*				 SI */
1803    COSTS_N_INSNS (4),			/*				 DI */
1804    COSTS_N_INSNS (2)},			/*			      other */
1805   0,					/* cost of multiply per each bit set */
1806   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1807    COSTS_N_INSNS (26),			/*			    HI */
1808    COSTS_N_INSNS (42),			/*			    SI */
1809    COSTS_N_INSNS (74),			/*			    DI */
1810    COSTS_N_INSNS (74)},			/*			    other */
1811   COSTS_N_INSNS (1),			/* cost of movsx */
1812   COSTS_N_INSNS (1),			/* cost of movzx */
1813   8,					/* "large" insn */
1814   17,					/* MOVE_RATIO */
1815   4,					/* cost for loading QImode using movzbl */
1816   {4, 4, 4},				/* cost of loading integer registers
1817 					   in QImode, HImode and SImode.
1818 					   Relative to reg-reg move (2).  */
1819   {4, 4, 4},				/* cost of storing integer registers */
1820   4,					/* cost of reg,reg fld/fst */
1821   {12, 12, 12},				/* cost of loading fp registers
1822 					   in SFmode, DFmode and XFmode */
1823   {6, 6, 8},				/* cost of storing fp registers
1824 					   in SFmode, DFmode and XFmode */
1825   2,					/* cost of moving MMX register */
1826   {8, 8},				/* cost of loading MMX registers
1827 					   in SImode and DImode */
1828   {8, 8},				/* cost of storing MMX registers
1829 					   in SImode and DImode */
1830   2,					/* cost of moving SSE register */
1831   {8, 8, 8},				/* cost of loading SSE registers
1832 					   in SImode, DImode and TImode */
1833   {8, 8, 8},				/* cost of storing SSE registers
1834 					   in SImode, DImode and TImode */
1835   5,					/* MMX or SSE register to integer */
1836   32,					/* size of l1 cache.  */
1837   256,					/* size of l2 cache.  */
1838   64,					/* size of prefetch block */
1839   6,					/* number of parallel prefetches */
1840   3,					/* Branch cost */
1841   COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
1842   COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1843   COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
1844   COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
1845   COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
1846   COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
1847   intel_memcpy,
1848   intel_memset,
1849   1,					/* scalar_stmt_cost.  */
1850   1,					/* scalar load_cost.  */
1851   1,					/* scalar_store_cost.  */
1852   1,					/* vec_stmt_cost.  */
1853   4,					/* vec_to_scalar_cost.  */
1854   1,					/* scalar_to_vec_cost.  */
1855   1,					/* vec_align_load_cost.  */
1856   2,					/* vec_unalign_load_cost.  */
1857   1,					/* vec_store_cost.  */
1858   3,					/* cond_taken_branch_cost.  */
1859   1,					/* cond_not_taken_branch_cost.  */
1860 };
1861 
1862 /* Generic should produce code tuned for Core-i7 (and newer chips)
1863    and btver1 (and newer chips).  */
1864 
1865 static stringop_algs generic_memcpy[2] = {
1866   {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1867              {-1, libcall, false}}},
1868   {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1869              {-1, libcall, false}}}};
1870 static stringop_algs generic_memset[2] = {
1871   {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1872              {-1, libcall, false}}},
1873   {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1874              {-1, libcall, false}}}};
1875 static const
1876 struct processor_costs generic_cost = {
1877   COSTS_N_INSNS (1),			/* cost of an add instruction */
1878   /* On all chips taken into consideration lea is 2 cycles and more.  With
1879      this cost however our current implementation of synth_mult results in
1880      use of unnecessary temporary registers causing regression on several
1881      SPECfp benchmarks.  */
1882   COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1883   COSTS_N_INSNS (1),			/* variable shift costs */
1884   COSTS_N_INSNS (1),			/* constant shift costs */
1885   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1886    COSTS_N_INSNS (4),			/*				 HI */
1887    COSTS_N_INSNS (3),			/*				 SI */
1888    COSTS_N_INSNS (4),			/*				 DI */
1889    COSTS_N_INSNS (2)},			/*			      other */
1890   0,					/* cost of multiply per each bit set */
1891   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1892    COSTS_N_INSNS (26),			/*			    HI */
1893    COSTS_N_INSNS (42),			/*			    SI */
1894    COSTS_N_INSNS (74),			/*			    DI */
1895    COSTS_N_INSNS (74)},			/*			    other */
1896   COSTS_N_INSNS (1),			/* cost of movsx */
1897   COSTS_N_INSNS (1),			/* cost of movzx */
1898   8,					/* "large" insn */
1899   17,					/* MOVE_RATIO */
1900   4,				     /* cost for loading QImode using movzbl */
1901   {4, 4, 4},				/* cost of loading integer registers
1902 					   in QImode, HImode and SImode.
1903 					   Relative to reg-reg move (2).  */
1904   {4, 4, 4},				/* cost of storing integer registers */
1905   4,					/* cost of reg,reg fld/fst */
1906   {12, 12, 12},				/* cost of loading fp registers
1907 					   in SFmode, DFmode and XFmode */
1908   {6, 6, 8},				/* cost of storing fp registers
1909 					   in SFmode, DFmode and XFmode */
1910   2,					/* cost of moving MMX register */
1911   {8, 8},				/* cost of loading MMX registers
1912 					   in SImode and DImode */
1913   {8, 8},				/* cost of storing MMX registers
1914 					   in SImode and DImode */
1915   2,					/* cost of moving SSE register */
1916   {8, 8, 8},				/* cost of loading SSE registers
1917 					   in SImode, DImode and TImode */
1918   {8, 8, 8},				/* cost of storing SSE registers
1919 					   in SImode, DImode and TImode */
1920   5,					/* MMX or SSE register to integer */
1921   32,					/* size of l1 cache.  */
1922   512,					/* size of l2 cache.  */
1923   64,					/* size of prefetch block */
1924   6,					/* number of parallel prefetches */
1925   /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1926      value is increased to perhaps more appropriate value of 5.  */
1927   3,					/* Branch cost */
1928   COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
1929   COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1930   COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
1931   COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
1932   COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
1933   COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
1934   generic_memcpy,
1935   generic_memset,
1936   1,					/* scalar_stmt_cost.  */
1937   1,					/* scalar load_cost.  */
1938   1,					/* scalar_store_cost.  */
1939   1,					/* vec_stmt_cost.  */
1940   1,					/* vec_to_scalar_cost.  */
1941   1,					/* scalar_to_vec_cost.  */
1942   1,					/* vec_align_load_cost.  */
1943   2,					/* vec_unalign_load_cost.  */
1944   1,					/* vec_store_cost.  */
1945   3,					/* cond_taken_branch_cost.  */
1946   1,					/* cond_not_taken_branch_cost.  */
1947 };
1948 
1949 /* core_cost should produce code tuned for Core familly of CPUs.  */
1950 static stringop_algs core_memcpy[2] = {
1951   {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1952   {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1953              {-1, libcall, false}}}};
1954 static stringop_algs core_memset[2] = {
1955   {libcall, {{6, loop_1_byte, true},
1956              {24, loop, true},
1957              {8192, rep_prefix_4_byte, true},
1958              {-1, libcall, false}}},
1959   {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1960              {-1, libcall, false}}}};
1961 
1962 static const
1963 struct processor_costs core_cost = {
1964   COSTS_N_INSNS (1),			/* cost of an add instruction */
1965   /* On all chips taken into consideration lea is 2 cycles and more.  With
1966      this cost however our current implementation of synth_mult results in
1967      use of unnecessary temporary registers causing regression on several
1968      SPECfp benchmarks.  */
1969   COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1970   COSTS_N_INSNS (1),			/* variable shift costs */
1971   COSTS_N_INSNS (1),			/* constant shift costs */
1972   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1973    COSTS_N_INSNS (4),			/*				 HI */
1974    COSTS_N_INSNS (3),			/*				 SI */
1975    COSTS_N_INSNS (4),			/*				 DI */
1976    COSTS_N_INSNS (2)},			/*			      other */
1977   0,					/* cost of multiply per each bit set */
1978   {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1979    COSTS_N_INSNS (26),			/*			    HI */
1980    COSTS_N_INSNS (42),			/*			    SI */
1981    COSTS_N_INSNS (74),			/*			    DI */
1982    COSTS_N_INSNS (74)},			/*			    other */
1983   COSTS_N_INSNS (1),			/* cost of movsx */
1984   COSTS_N_INSNS (1),			/* cost of movzx */
1985   8,					/* "large" insn */
1986   17,					/* MOVE_RATIO */
1987   4,				     /* cost for loading QImode using movzbl */
1988   {4, 4, 4},				/* cost of loading integer registers
1989 					   in QImode, HImode and SImode.
1990 					   Relative to reg-reg move (2).  */
1991   {4, 4, 4},				/* cost of storing integer registers */
1992   4,					/* cost of reg,reg fld/fst */
1993   {12, 12, 12},				/* cost of loading fp registers
1994 					   in SFmode, DFmode and XFmode */
1995   {6, 6, 8},				/* cost of storing fp registers
1996 					   in SFmode, DFmode and XFmode */
1997   2,					/* cost of moving MMX register */
1998   {8, 8},				/* cost of loading MMX registers
1999 					   in SImode and DImode */
2000   {8, 8},				/* cost of storing MMX registers
2001 					   in SImode and DImode */
2002   2,					/* cost of moving SSE register */
2003   {8, 8, 8},				/* cost of loading SSE registers
2004 					   in SImode, DImode and TImode */
2005   {8, 8, 8},				/* cost of storing SSE registers
2006 					   in SImode, DImode and TImode */
2007   5,					/* MMX or SSE register to integer */
2008   64,					/* size of l1 cache.  */
2009   512,					/* size of l2 cache.  */
2010   64,					/* size of prefetch block */
2011   6,					/* number of parallel prefetches */
2012   /* FIXME perhaps more appropriate value is 5.  */
2013   3,					/* Branch cost */
2014   COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
2015   COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
2016   COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
2017   COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
2018   COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
2019   COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
2020   core_memcpy,
2021   core_memset,
2022   1,					/* scalar_stmt_cost.  */
2023   1,					/* scalar load_cost.  */
2024   1,					/* scalar_store_cost.  */
2025   1,					/* vec_stmt_cost.  */
2026   1,					/* vec_to_scalar_cost.  */
2027   1,					/* scalar_to_vec_cost.  */
2028   1,					/* vec_align_load_cost.  */
2029   2,					/* vec_unalign_load_cost.  */
2030   1,					/* vec_store_cost.  */
2031   3,					/* cond_taken_branch_cost.  */
2032   1,					/* cond_not_taken_branch_cost.  */
2033 };
2034 
2035 
2036 /* Set by -mtune.  */
2037 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2038 
2039 /* Set by -mtune or -Os.  */
2040 const struct processor_costs *ix86_cost = &pentium_cost;
2041 
2042 /* Processor feature/optimization bitmasks.  */
2043 #define m_386 (1<<PROCESSOR_I386)
2044 #define m_486 (1<<PROCESSOR_I486)
2045 #define m_PENT (1<<PROCESSOR_PENTIUM)
2046 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2047 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2048 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2049 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2050 #define m_CORE2 (1<<PROCESSOR_CORE2)
2051 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2052 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2053 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2054 #define m_CORE_ALL (m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_HASWELL)
2055 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2056 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2057 #define m_KNL (1<<PROCESSOR_KNL)
2058 #define m_INTEL (1<<PROCESSOR_INTEL)
2059 
2060 #define m_GEODE (1<<PROCESSOR_GEODE)
2061 #define m_K6 (1<<PROCESSOR_K6)
2062 #define m_K6_GEODE (m_K6 | m_GEODE)
2063 #define m_K8 (1<<PROCESSOR_K8)
2064 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2065 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2066 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2067 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2068 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2069 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2070 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2071 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2072 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2073 #define m_BDVER	(m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2074 #define m_BTVER (m_BTVER1 | m_BTVER2)
2075 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2076 
2077 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2078 
2079 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2080 #undef DEF_TUNE
2081 #define DEF_TUNE(tune, name, selector) name,
2082 #include "x86-tune.def"
2083 #undef DEF_TUNE
2084 };
2085 
2086 /* Feature tests against the various tunings.  */
2087 unsigned char ix86_tune_features[X86_TUNE_LAST];
2088 
2089 /* Feature tests against the various tunings used to create ix86_tune_features
2090    based on the processor mask.  */
2091 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2092 #undef DEF_TUNE
2093 #define DEF_TUNE(tune, name, selector) selector,
2094 #include "x86-tune.def"
2095 #undef DEF_TUNE
2096 };
2097 
2098 /* Feature tests against the various architecture variations.  */
2099 unsigned char ix86_arch_features[X86_ARCH_LAST];
2100 
2101 /* Feature tests against the various architecture variations, used to create
2102    ix86_arch_features based on the processor mask.  */
2103 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2104   /* X86_ARCH_CMOV: Conditional move was added for pentiumpro.  */
2105   ~(m_386 | m_486 | m_PENT | m_K6),
2106 
2107   /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486.  */
2108   ~m_386,
2109 
2110   /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2111   ~(m_386 | m_486),
2112 
2113   /* X86_ARCH_XADD: Exchange and add was added for 80486.  */
2114   ~m_386,
2115 
2116   /* X86_ARCH_BSWAP: Byteswap was added for 80486.  */
2117   ~m_386,
2118 };
2119 
2120 /* In case the average insn count for single function invocation is
2121    lower than this constant, emit fast (but longer) prologue and
2122    epilogue code.  */
2123 #define FAST_PROLOGUE_INSN_COUNT 20
2124 
2125 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
2126 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2127 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2128 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129 
2130 /* Array of the smallest class containing reg number REGNO, indexed by
2131    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
2132 
2133 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134 {
2135   /* ax, dx, cx, bx */
2136   AREG, DREG, CREG, BREG,
2137   /* si, di, bp, sp */
2138   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2139   /* FP registers */
2140   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2141   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2142   /* arg pointer */
2143   NON_Q_REGS,
2144   /* flags, fpsr, fpcr, frame */
2145   NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2146   /* SSE registers */
2147   SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2148   SSE_REGS, SSE_REGS,
2149   /* MMX registers */
2150   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2151   MMX_REGS, MMX_REGS,
2152   /* REX registers */
2153   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2155   /* SSE REX registers */
2156   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2157   SSE_REGS, SSE_REGS,
2158   /* AVX-512 SSE registers */
2159   EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160   EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161   EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162   EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2163   /* Mask registers.  */
2164   MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165   MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2166   /* MPX bound registers */
2167   BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2168 };
2169 
2170 /* The "default" register map used in 32bit mode.  */
2171 
2172 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2173 {
2174   0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
2175   12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
2176   -1, -1, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
2177   21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
2178   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
2179   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
2180   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
2181   -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 16-23*/
2182   -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 24-31*/
2183   93, 94, 95, 96, 97, 98, 99, 100,      /* Mask registers */
2184   101, 102, 103, 104,			/* bound registers */
2185 };
2186 
2187 /* The "default" register map used in 64bit mode.  */
2188 
2189 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2190 {
2191   0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
2192   33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
2193   -1, -1, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
2194   17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
2195   41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
2196   8,9,10,11,12,13,14,15,		/* extended integer registers */
2197   25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
2198   67, 68, 69, 70, 71, 72, 73, 74,       /* AVX-512 registers 16-23 */
2199   75, 76, 77, 78, 79, 80, 81, 82,       /* AVX-512 registers 24-31 */
2200   118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2201   126, 127, 128, 129,			/* bound registers */
2202 };
2203 
2204 /* Define the register numbers to be used in Dwarf debugging information.
2205    The SVR4 reference port C compiler uses the following register numbers
2206    in its Dwarf output code:
2207 	0 for %eax (gcc regno = 0)
2208 	1 for %ecx (gcc regno = 2)
2209 	2 for %edx (gcc regno = 1)
2210 	3 for %ebx (gcc regno = 3)
2211 	4 for %esp (gcc regno = 7)
2212 	5 for %ebp (gcc regno = 6)
2213 	6 for %esi (gcc regno = 4)
2214 	7 for %edi (gcc regno = 5)
2215    The following three DWARF register numbers are never generated by
2216    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2217    believes these numbers have these meanings.
2218 	8  for %eip    (no gcc equivalent)
2219 	9  for %eflags (gcc regno = 17)
2220 	10 for %trapno (no gcc equivalent)
2221    It is not at all clear how we should number the FP stack registers
2222    for the x86 architecture.  If the version of SDB on x86/svr4 were
2223    a bit less brain dead with respect to floating-point then we would
2224    have a precedent to follow with respect to DWARF register numbers
2225    for x86 FP registers, but the SDB on x86/svr4 is so completely
2226    broken with respect to FP registers that it is hardly worth thinking
2227    of it as something to strive for compatibility with.
2228    The version of x86/svr4 SDB I have at the moment does (partially)
2229    seem to believe that DWARF register number 11 is associated with
2230    the x86 register %st(0), but that's about all.  Higher DWARF
2231    register numbers don't seem to be associated with anything in
2232    particular, and even for DWARF regno 11, SDB only seems to under-
2233    stand that it should say that a variable lives in %st(0) (when
2234    asked via an `=' command) if we said it was in DWARF regno 11,
2235    but SDB still prints garbage when asked for the value of the
2236    variable in question (via a `/' command).
2237    (Also note that the labels SDB prints for various FP stack regs
2238    when doing an `x' command are all wrong.)
2239    Note that these problems generally don't affect the native SVR4
2240    C compiler because it doesn't allow the use of -O with -g and
2241    because when it is *not* optimizing, it allocates a memory
2242    location for each floating-point variable, and the memory
2243    location is what gets described in the DWARF AT_location
2244    attribute for the variable in question.
2245    Regardless of the severe mental illness of the x86/svr4 SDB, we
2246    do something sensible here and we use the following DWARF
2247    register numbers.  Note that these are all stack-top-relative
2248    numbers.
2249 	11 for %st(0) (gcc regno = 8)
2250 	12 for %st(1) (gcc regno = 9)
2251 	13 for %st(2) (gcc regno = 10)
2252 	14 for %st(3) (gcc regno = 11)
2253 	15 for %st(4) (gcc regno = 12)
2254 	16 for %st(5) (gcc regno = 13)
2255 	17 for %st(6) (gcc regno = 14)
2256 	18 for %st(7) (gcc regno = 15)
2257 */
2258 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259 {
2260   0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
2261   11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
2262   -1, 9, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
2263   21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
2264   29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
2265   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
2266   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
2267   -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 16-23*/
2268   -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 24-31*/
2269   93, 94, 95, 96, 97, 98, 99, 100,      /* Mask registers */
2270   101, 102, 103, 104,			/* bound registers */
2271 };
2272 
2273 /* Define parameter passing and return registers.  */
2274 
2275 static int const x86_64_int_parameter_registers[6] =
2276 {
2277   DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2278 };
2279 
2280 static int const x86_64_ms_abi_int_parameter_registers[4] =
2281 {
2282   CX_REG, DX_REG, R8_REG, R9_REG
2283 };
2284 
2285 static int const x86_64_int_return_registers[4] =
2286 {
2287   AX_REG, DX_REG, DI_REG, SI_REG
2288 };
2289 
2290 /* Additional registers that are clobbered by SYSV calls.  */
2291 
2292 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2293 {
2294   SI_REG, DI_REG,
2295   XMM6_REG, XMM7_REG,
2296   XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2297   XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2298 };
2299 
2300 /* Define the structure for the machine field in struct function.  */
2301 
2302 struct GTY(()) stack_local_entry {
2303   unsigned short mode;
2304   unsigned short n;
2305   rtx rtl;
2306   struct stack_local_entry *next;
2307 };
2308 
2309 /* Structure describing stack frame layout.
2310    Stack grows downward:
2311 
2312    [arguments]
2313 					<- ARG_POINTER
2314    saved pc
2315 
2316    saved static chain			if ix86_static_chain_on_stack
2317 
2318    saved frame pointer			if frame_pointer_needed
2319 					<- HARD_FRAME_POINTER
2320    [saved regs]
2321 					<- regs_save_offset
2322    [padding0]
2323 
2324    [saved SSE regs]
2325 					<- sse_regs_save_offset
2326    [padding1]          |
2327 		       |		<- FRAME_POINTER
2328    [va_arg registers]  |
2329 		       |
2330    [frame]	       |
2331 		       |
2332    [padding2]	       | = to_allocate
2333 					<- STACK_POINTER
2334   */
2335 struct ix86_frame
2336 {
2337   int nsseregs;
2338   int nregs;
2339   int va_arg_size;
2340   int red_zone_size;
2341   int outgoing_arguments_size;
2342 
2343   /* The offsets relative to ARG_POINTER.  */
2344   HOST_WIDE_INT frame_pointer_offset;
2345   HOST_WIDE_INT hard_frame_pointer_offset;
2346   HOST_WIDE_INT stack_pointer_offset;
2347   HOST_WIDE_INT hfp_save_offset;
2348   HOST_WIDE_INT reg_save_offset;
2349   HOST_WIDE_INT sse_reg_save_offset;
2350 
2351   /* When save_regs_using_mov is set, emit prologue using
2352      move instead of push instructions.  */
2353   bool save_regs_using_mov;
2354 };
2355 
2356 /* Which cpu are we scheduling for.  */
2357 enum attr_cpu ix86_schedule;
2358 
2359 /* Which cpu are we optimizing for.  */
2360 enum processor_type ix86_tune;
2361 
2362 /* Which instruction set architecture to use.  */
2363 enum processor_type ix86_arch;
2364 
2365 /* True if processor has SSE prefetch instruction.  */
2366 unsigned char x86_prefetch_sse;
2367 
2368 /* -mstackrealign option */
2369 static const char ix86_force_align_arg_pointer_string[]
2370   = "force_align_arg_pointer";
2371 
2372 static rtx (*ix86_gen_leave) (void);
2373 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2375 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2376 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2377 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2379 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2381 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2382 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2383 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2384 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2385 
2386 /* Preferred alignment for stack boundary in bits.  */
2387 unsigned int ix86_preferred_stack_boundary;
2388 
2389 /* Alignment for incoming stack boundary in bits specified at
2390    command line.  */
2391 static unsigned int ix86_user_incoming_stack_boundary;
2392 
2393 /* Default alignment for incoming stack boundary in bits.  */
2394 static unsigned int ix86_default_incoming_stack_boundary;
2395 
2396 /* Alignment for incoming stack boundary in bits.  */
2397 unsigned int ix86_incoming_stack_boundary;
2398 
2399 /* Calling abi specific va_list type nodes.  */
2400 static GTY(()) tree sysv_va_list_type_node;
2401 static GTY(()) tree ms_va_list_type_node;
2402 
2403 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
2404 char internal_label_prefix[16];
2405 int internal_label_prefix_len;
2406 
2407 /* Fence to use after loop using movnt.  */
2408 tree x86_mfence;
2409 
2410 /* Register class used for passing given 64bit part of the argument.
2411    These represent classes as documented by the PS ABI, with the exception
2412    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2413    use SF or DFmode move instead of DImode to avoid reformatting penalties.
2414 
2415    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2416    whenever possible (upper half does contain padding).  */
2417 enum x86_64_reg_class
2418   {
2419     X86_64_NO_CLASS,
2420     X86_64_INTEGER_CLASS,
2421     X86_64_INTEGERSI_CLASS,
2422     X86_64_SSE_CLASS,
2423     X86_64_SSESF_CLASS,
2424     X86_64_SSEDF_CLASS,
2425     X86_64_SSEUP_CLASS,
2426     X86_64_X87_CLASS,
2427     X86_64_X87UP_CLASS,
2428     X86_64_COMPLEX_X87_CLASS,
2429     X86_64_MEMORY_CLASS
2430   };
2431 
2432 #define MAX_CLASSES 8
2433 
2434 /* Table of constants used by fldpi, fldln2, etc....  */
2435 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2436 static bool ext_80387_constants_init = 0;
2437 
2438 
2439 static struct machine_function * ix86_init_machine_status (void);
2440 static rtx ix86_function_value (const_tree, const_tree, bool);
2441 static bool ix86_function_value_regno_p (const unsigned int);
2442 static unsigned int ix86_function_arg_boundary (machine_mode,
2443 						const_tree);
2444 static rtx ix86_static_chain (const_tree, bool);
2445 static int ix86_function_regparm (const_tree, const_tree);
2446 static void ix86_compute_frame_layout (struct ix86_frame *);
2447 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2448 						 rtx, rtx, int);
2449 static void ix86_add_new_builtins (HOST_WIDE_INT);
2450 static tree ix86_canonical_va_list_type (tree);
2451 static void predict_jump (int);
2452 static unsigned int split_stack_prologue_scratch_regno (void);
2453 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2454 
2455 enum ix86_function_specific_strings
2456 {
2457   IX86_FUNCTION_SPECIFIC_ARCH,
2458   IX86_FUNCTION_SPECIFIC_TUNE,
2459   IX86_FUNCTION_SPECIFIC_MAX
2460 };
2461 
2462 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2463 				 const char *, enum fpmath_unit, bool);
2464 static void ix86_function_specific_save (struct cl_target_option *,
2465 					 struct gcc_options *opts);
2466 static void ix86_function_specific_restore (struct gcc_options *opts,
2467 					    struct cl_target_option *);
2468 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2469 static void ix86_function_specific_print (FILE *, int,
2470 					  struct cl_target_option *);
2471 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2472 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2473 						 struct gcc_options *,
2474 						 struct gcc_options *,
2475 						 struct gcc_options *);
2476 static bool ix86_can_inline_p (tree, tree);
2477 static void ix86_set_current_function (tree);
2478 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2479 
2480 static enum calling_abi ix86_function_abi (const_tree);
2481 
2482 
2483 #ifndef SUBTARGET32_DEFAULT_CPU
2484 #define SUBTARGET32_DEFAULT_CPU "i386"
2485 #endif
2486 
2487 /* Whether -mtune= or -march= were specified */
2488 static int ix86_tune_defaulted;
2489 static int ix86_arch_specified;
2490 
2491 /* Vectorization library interface and handlers.  */
2492 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2493 
2494 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2495 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2496 
2497 /* Processor target table, indexed by processor number */
2498 struct ptt
2499 {
2500   const char *const name;			/* processor name  */
2501   const struct processor_costs *cost;		/* Processor costs */
2502   const int align_loop;				/* Default alignments.  */
2503   const int align_loop_max_skip;
2504   const int align_jump;
2505   const int align_jump_max_skip;
2506   const int align_func;
2507 };
2508 
2509 /* This table must be in sync with enum processor_type in i386.h.  */
2510 static const struct ptt processor_target_table[PROCESSOR_max] =
2511 {
2512   {"generic", &generic_cost, 16, 10, 16, 10, 16},
2513   {"i386", &i386_cost, 4, 3, 4, 3, 4},
2514   {"i486", &i486_cost, 16, 15, 16, 15, 16},
2515   {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2516   {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2517   {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2518   {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2519   {"core2", &core_cost, 16, 10, 16, 10, 16},
2520   {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2521   {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2522   {"haswell", &core_cost, 16, 10, 16, 10, 16},
2523   {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2524   {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2525   {"knl", &slm_cost, 16, 15, 16, 7, 16},
2526   {"intel", &intel_cost, 16, 15, 16, 7, 16},
2527   {"geode", &geode_cost, 0, 0, 0, 0, 0},
2528   {"k6", &k6_cost, 32, 7, 32, 7, 32},
2529   {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2530   {"k8", &k8_cost, 16, 7, 16, 7, 16},
2531   {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2532   {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2533   {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2534   {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2535   {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2536   {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2537   {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2538 };
2539 
2540 static unsigned int
2541 rest_of_handle_insert_vzeroupper (void)
2542 {
2543   int i;
2544 
2545   /* vzeroupper instructions are inserted immediately after reload to
2546      account for possible spills from 256bit registers.  The pass
2547      reuses mode switching infrastructure by re-running mode insertion
2548      pass, so disable entities that have already been processed.  */
2549   for (i = 0; i < MAX_386_ENTITIES; i++)
2550     ix86_optimize_mode_switching[i] = 0;
2551 
2552   ix86_optimize_mode_switching[AVX_U128] = 1;
2553 
2554   /* Call optimize_mode_switching.  */
2555   g->get_passes ()->execute_pass_mode_switching ();
2556   return 0;
2557 }
2558 
2559 namespace {
2560 
2561 const pass_data pass_data_insert_vzeroupper =
2562 {
2563   RTL_PASS, /* type */
2564   "vzeroupper", /* name */
2565   OPTGROUP_NONE, /* optinfo_flags */
2566   TV_NONE, /* tv_id */
2567   0, /* properties_required */
2568   0, /* properties_provided */
2569   0, /* properties_destroyed */
2570   0, /* todo_flags_start */
2571   TODO_df_finish, /* todo_flags_finish */
2572 };
2573 
2574 class pass_insert_vzeroupper : public rtl_opt_pass
2575 {
2576 public:
2577   pass_insert_vzeroupper(gcc::context *ctxt)
2578     : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2579   {}
2580 
2581   /* opt_pass methods: */
2582   virtual bool gate (function *)
2583     {
2584       return TARGET_AVX && !TARGET_AVX512F
2585 	     && TARGET_VZEROUPPER && flag_expensive_optimizations
2586 	     && !optimize_size;
2587     }
2588 
2589   virtual unsigned int execute (function *)
2590     {
2591       return rest_of_handle_insert_vzeroupper ();
2592     }
2593 
2594 }; // class pass_insert_vzeroupper
2595 
2596 } // anon namespace
2597 
2598 rtl_opt_pass *
2599 make_pass_insert_vzeroupper (gcc::context *ctxt)
2600 {
2601   return new pass_insert_vzeroupper (ctxt);
2602 }
2603 
2604 /* Return true if a red-zone is in use.  */
2605 
2606 static inline bool
2607 ix86_using_red_zone (void)
2608 {
2609   return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2610 }
2611 
2612 /* Return a string that documents the current -m options.  The caller is
2613    responsible for freeing the string.  */
2614 
2615 static char *
2616 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2617 		    const char *tune, enum fpmath_unit fpmath,
2618 		    bool add_nl_p)
2619 {
2620   struct ix86_target_opts
2621   {
2622     const char *option;		/* option string */
2623     HOST_WIDE_INT mask;		/* isa mask options */
2624   };
2625 
2626   /* This table is ordered so that options like -msse4.2 that imply
2627      preceding options while match those first.  */
2628   static struct ix86_target_opts isa_opts[] =
2629   {
2630     { "-mfma4",		OPTION_MASK_ISA_FMA4 },
2631     { "-mfma",		OPTION_MASK_ISA_FMA },
2632     { "-mxop",		OPTION_MASK_ISA_XOP },
2633     { "-mlwp",		OPTION_MASK_ISA_LWP },
2634     { "-mavx512f",	OPTION_MASK_ISA_AVX512F },
2635     { "-mavx512er",	OPTION_MASK_ISA_AVX512ER },
2636     { "-mavx512cd",	OPTION_MASK_ISA_AVX512CD },
2637     { "-mavx512pf",	OPTION_MASK_ISA_AVX512PF },
2638     { "-mavx512dq",	OPTION_MASK_ISA_AVX512DQ },
2639     { "-mavx512bw",	OPTION_MASK_ISA_AVX512BW },
2640     { "-mavx512vl",	OPTION_MASK_ISA_AVX512VL },
2641     { "-mavx512ifma",	OPTION_MASK_ISA_AVX512IFMA },
2642     { "-mavx512vbmi",	OPTION_MASK_ISA_AVX512VBMI },
2643     { "-msse4a",	OPTION_MASK_ISA_SSE4A },
2644     { "-msse4.2",	OPTION_MASK_ISA_SSE4_2 },
2645     { "-msse4.1",	OPTION_MASK_ISA_SSE4_1 },
2646     { "-mssse3",	OPTION_MASK_ISA_SSSE3 },
2647     { "-msse3",		OPTION_MASK_ISA_SSE3 },
2648     { "-msse2",		OPTION_MASK_ISA_SSE2 },
2649     { "-msse",		OPTION_MASK_ISA_SSE },
2650     { "-m3dnow",	OPTION_MASK_ISA_3DNOW },
2651     { "-m3dnowa",	OPTION_MASK_ISA_3DNOW_A },
2652     { "-mmmx",		OPTION_MASK_ISA_MMX },
2653     { "-mabm",		OPTION_MASK_ISA_ABM },
2654     { "-mbmi",		OPTION_MASK_ISA_BMI },
2655     { "-mbmi2",		OPTION_MASK_ISA_BMI2 },
2656     { "-mlzcnt",	OPTION_MASK_ISA_LZCNT },
2657     { "-mhle",		OPTION_MASK_ISA_HLE },
2658     { "-mfxsr",		OPTION_MASK_ISA_FXSR },
2659     { "-mrdseed",	OPTION_MASK_ISA_RDSEED },
2660     { "-mprfchw",	OPTION_MASK_ISA_PRFCHW },
2661     { "-madx",		OPTION_MASK_ISA_ADX },
2662     { "-mtbm",		OPTION_MASK_ISA_TBM },
2663     { "-mpopcnt",	OPTION_MASK_ISA_POPCNT },
2664     { "-mmovbe",	OPTION_MASK_ISA_MOVBE },
2665     { "-mcrc32",	OPTION_MASK_ISA_CRC32 },
2666     { "-maes",		OPTION_MASK_ISA_AES },
2667     { "-msha",		OPTION_MASK_ISA_SHA },
2668     { "-mpclmul",	OPTION_MASK_ISA_PCLMUL },
2669     { "-mfsgsbase",	OPTION_MASK_ISA_FSGSBASE },
2670     { "-mrdrnd",	OPTION_MASK_ISA_RDRND },
2671     { "-mf16c",		OPTION_MASK_ISA_F16C },
2672     { "-mrtm",		OPTION_MASK_ISA_RTM },
2673     { "-mxsave",	OPTION_MASK_ISA_XSAVE },
2674     { "-mxsaveopt",	OPTION_MASK_ISA_XSAVEOPT },
2675     { "-mprefetchwt1",	OPTION_MASK_ISA_PREFETCHWT1 },
2676     { "-mclflushopt",   OPTION_MASK_ISA_CLFLUSHOPT },
2677     { "-mxsavec",	OPTION_MASK_ISA_XSAVEC },
2678     { "-mxsaves",	OPTION_MASK_ISA_XSAVES },
2679     { "-mmpx",          OPTION_MASK_ISA_MPX },
2680     { "-mclwb",		OPTION_MASK_ISA_CLWB },
2681     { "-mpcommit",	OPTION_MASK_ISA_PCOMMIT },
2682     { "-mmwaitx",	OPTION_MASK_ISA_MWAITX  },
2683   };
2684 
2685   /* Flag options.  */
2686   static struct ix86_target_opts flag_opts[] =
2687   {
2688     { "-m128bit-long-double",		MASK_128BIT_LONG_DOUBLE },
2689     { "-mlong-double-128",		MASK_LONG_DOUBLE_128 },
2690     { "-mlong-double-64",		MASK_LONG_DOUBLE_64 },
2691     { "-m80387",			MASK_80387 },
2692     { "-maccumulate-outgoing-args",	MASK_ACCUMULATE_OUTGOING_ARGS },
2693     { "-malign-double",			MASK_ALIGN_DOUBLE },
2694     { "-mcld",				MASK_CLD },
2695     { "-mfp-ret-in-387",		MASK_FLOAT_RETURNS },
2696     { "-mieee-fp",			MASK_IEEE_FP },
2697     { "-minline-all-stringops",		MASK_INLINE_ALL_STRINGOPS },
2698     { "-minline-stringops-dynamically",	MASK_INLINE_STRINGOPS_DYNAMICALLY },
2699     { "-mms-bitfields",			MASK_MS_BITFIELD_LAYOUT },
2700     { "-mno-align-stringops",		MASK_NO_ALIGN_STRINGOPS },
2701     { "-mno-fancy-math-387",		MASK_NO_FANCY_MATH_387 },
2702     { "-mno-push-args",			MASK_NO_PUSH_ARGS },
2703     { "-mno-red-zone",			MASK_NO_RED_ZONE },
2704     { "-momit-leaf-frame-pointer",	MASK_OMIT_LEAF_FRAME_POINTER },
2705     { "-mrecip",			MASK_RECIP },
2706     { "-mrtd",				MASK_RTD },
2707     { "-msseregparm",			MASK_SSEREGPARM },
2708     { "-mstack-arg-probe",		MASK_STACK_PROBE },
2709     { "-mtls-direct-seg-refs",		MASK_TLS_DIRECT_SEG_REFS },
2710     { "-mvect8-ret-in-mem",		MASK_VECT8_RETURNS },
2711     { "-m8bit-idiv",			MASK_USE_8BIT_IDIV },
2712     { "-mvzeroupper",			MASK_VZEROUPPER },
2713     { "-mavx256-split-unaligned-load",	MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2714     { "-mavx256-split-unaligned-store",	MASK_AVX256_SPLIT_UNALIGNED_STORE},
2715     { "-mprefer-avx128",		MASK_PREFER_AVX128},
2716   };
2717 
2718   const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2719 
2720   char isa_other[40];
2721   char target_other[40];
2722   unsigned num = 0;
2723   unsigned i, j;
2724   char *ret;
2725   char *ptr;
2726   size_t len;
2727   size_t line_len;
2728   size_t sep_len;
2729   const char *abi;
2730 
2731   memset (opts, '\0', sizeof (opts));
2732 
2733   /* Add -march= option.  */
2734   if (arch)
2735     {
2736       opts[num][0] = "-march=";
2737       opts[num++][1] = arch;
2738     }
2739 
2740   /* Add -mtune= option.  */
2741   if (tune)
2742     {
2743       opts[num][0] = "-mtune=";
2744       opts[num++][1] = tune;
2745     }
2746 
2747   /* Add -m32/-m64/-mx32.  */
2748   if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2749     {
2750       if ((isa & OPTION_MASK_ABI_64) != 0)
2751 	abi = "-m64";
2752       else
2753 	abi = "-mx32";
2754       isa &= ~ (OPTION_MASK_ISA_64BIT
2755 		| OPTION_MASK_ABI_64
2756 		| OPTION_MASK_ABI_X32);
2757     }
2758   else
2759     abi = "-m32";
2760   opts[num++][0] = abi;
2761 
2762   /* Pick out the options in isa options.  */
2763   for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2764     {
2765       if ((isa & isa_opts[i].mask) != 0)
2766 	{
2767 	  opts[num++][0] = isa_opts[i].option;
2768 	  isa &= ~ isa_opts[i].mask;
2769 	}
2770     }
2771 
2772   if (isa && add_nl_p)
2773     {
2774       opts[num++][0] = isa_other;
2775       sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2776 	       isa);
2777     }
2778 
2779   /* Add flag options.  */
2780   for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2781     {
2782       if ((flags & flag_opts[i].mask) != 0)
2783 	{
2784 	  opts[num++][0] = flag_opts[i].option;
2785 	  flags &= ~ flag_opts[i].mask;
2786 	}
2787     }
2788 
2789   if (flags && add_nl_p)
2790     {
2791       opts[num++][0] = target_other;
2792       sprintf (target_other, "(other flags: %#x)", flags);
2793     }
2794 
2795   /* Add -fpmath= option.  */
2796   if (fpmath)
2797     {
2798       opts[num][0] = "-mfpmath=";
2799       switch ((int) fpmath)
2800 	{
2801 	case FPMATH_387:
2802 	  opts[num++][1] = "387";
2803 	  break;
2804 
2805 	case FPMATH_SSE:
2806 	  opts[num++][1] = "sse";
2807 	  break;
2808 
2809 	case FPMATH_387 | FPMATH_SSE:
2810 	  opts[num++][1] = "sse+387";
2811 	  break;
2812 
2813 	default:
2814 	  gcc_unreachable ();
2815 	}
2816     }
2817 
2818   /* Any options?  */
2819   if (num == 0)
2820     return NULL;
2821 
2822   gcc_assert (num < ARRAY_SIZE (opts));
2823 
2824   /* Size the string.  */
2825   len = 0;
2826   sep_len = (add_nl_p) ? 3 : 1;
2827   for (i = 0; i < num; i++)
2828     {
2829       len += sep_len;
2830       for (j = 0; j < 2; j++)
2831 	if (opts[i][j])
2832 	  len += strlen (opts[i][j]);
2833     }
2834 
2835   /* Build the string.  */
2836   ret = ptr = (char *) xmalloc (len);
2837   line_len = 0;
2838 
2839   for (i = 0; i < num; i++)
2840     {
2841       size_t len2[2];
2842 
2843       for (j = 0; j < 2; j++)
2844 	len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2845 
2846       if (i != 0)
2847 	{
2848 	  *ptr++ = ' ';
2849 	  line_len++;
2850 
2851 	  if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2852 	    {
2853 	      *ptr++ = '\\';
2854 	      *ptr++ = '\n';
2855 	      line_len = 0;
2856 	    }
2857 	}
2858 
2859       for (j = 0; j < 2; j++)
2860 	if (opts[i][j])
2861 	  {
2862 	    memcpy (ptr, opts[i][j], len2[j]);
2863 	    ptr += len2[j];
2864 	    line_len += len2[j];
2865 	  }
2866     }
2867 
2868   *ptr = '\0';
2869   gcc_assert (ret + len >= ptr);
2870 
2871   return ret;
2872 }
2873 
2874 /* Return true, if profiling code should be emitted before
2875    prologue. Otherwise it returns false.
2876    Note: For x86 with "hotfix" it is sorried.  */
2877 static bool
2878 ix86_profile_before_prologue (void)
2879 {
2880   return flag_fentry != 0;
2881 }
2882 
2883 /* Function that is callable from the debugger to print the current
2884    options.  */
2885 void ATTRIBUTE_UNUSED
2886 ix86_debug_options (void)
2887 {
2888   char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2889 				   ix86_arch_string, ix86_tune_string,
2890 				   ix86_fpmath, true);
2891 
2892   if (opts)
2893     {
2894       fprintf (stderr, "%s\n\n", opts);
2895       free (opts);
2896     }
2897   else
2898     fputs ("<no options>\n\n", stderr);
2899 
2900   return;
2901 }
2902 
2903 static const char *stringop_alg_names[] = {
2904 #define DEF_ENUM
2905 #define DEF_ALG(alg, name) #name,
2906 #include "stringop.def"
2907 #undef DEF_ENUM
2908 #undef DEF_ALG
2909 };
2910 
2911 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2912    The string is of the following form (or comma separated list of it):
2913 
2914      strategy_alg:max_size:[align|noalign]
2915 
2916    where the full size range for the strategy is either [0, max_size] or
2917    [min_size, max_size], in which min_size is the max_size + 1 of the
2918    preceding range.  The last size range must have max_size == -1.
2919 
2920    Examples:
2921 
2922     1.
2923        -mmemcpy-strategy=libcall:-1:noalign
2924 
2925       this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2926 
2927 
2928    2.
2929       -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2930 
2931       This is to tell the compiler to use the following strategy for memset
2932       1) when the expected size is between [1, 16], use rep_8byte strategy;
2933       2) when the size is between [17, 2048], use vector_loop;
2934       3) when the size is > 2048, use libcall.  */
2935 
2936 struct stringop_size_range
2937 {
2938   int max;
2939   stringop_alg alg;
2940   bool noalign;
2941 };
2942 
2943 static void
2944 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2945 {
2946   const struct stringop_algs *default_algs;
2947   stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2948   char *curr_range_str, *next_range_str;
2949   int i = 0, n = 0;
2950 
2951   if (is_memset)
2952     default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2953   else
2954     default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2955 
2956   curr_range_str = strategy_str;
2957 
2958   do
2959     {
2960       int maxs;
2961       char alg_name[128];
2962       char align[16];
2963       next_range_str = strchr (curr_range_str, ',');
2964       if (next_range_str)
2965         *next_range_str++ = '\0';
2966 
2967       if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2968                        alg_name, &maxs, align))
2969         {
2970           error ("wrong arg %s to option %s", curr_range_str,
2971                  is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2972           return;
2973         }
2974 
2975       if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2976         {
2977           error ("size ranges of option %s should be increasing",
2978                  is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2979           return;
2980         }
2981 
2982       for (i = 0; i < last_alg; i++)
2983 	if (!strcmp (alg_name, stringop_alg_names[i]))
2984 	  break;
2985 
2986       if (i == last_alg)
2987         {
2988           error ("wrong stringop strategy name %s specified for option %s",
2989                  alg_name,
2990                  is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2991           return;
2992         }
2993 
2994       if ((stringop_alg) i == rep_prefix_8_byte
2995 	  && !TARGET_64BIT)
2996 	{
2997 	  /* rep; movq isn't available in 32-bit code.  */
2998 	  error ("stringop strategy name %s specified for option %s "
2999 		 "not supported for 32-bit code",
3000                  alg_name,
3001                  is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3002 	  return;
3003 	}
3004 
3005       input_ranges[n].max = maxs;
3006       input_ranges[n].alg = (stringop_alg) i;
3007       if (!strcmp (align, "align"))
3008         input_ranges[n].noalign = false;
3009       else if (!strcmp (align, "noalign"))
3010         input_ranges[n].noalign = true;
3011       else
3012         {
3013           error ("unknown alignment %s specified for option %s",
3014                  align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3015           return;
3016         }
3017       n++;
3018       curr_range_str = next_range_str;
3019     }
3020   while (curr_range_str);
3021 
3022   if (input_ranges[n - 1].max != -1)
3023     {
3024       error ("the max value for the last size range should be -1"
3025              " for option %s",
3026              is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3027       return;
3028     }
3029 
3030   if (n > MAX_STRINGOP_ALGS)
3031     {
3032       error ("too many size ranges specified in option %s",
3033              is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3034       return;
3035     }
3036 
3037   /* Now override the default algs array.  */
3038   for (i = 0; i < n; i++)
3039     {
3040       *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3041       *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3042           = input_ranges[i].alg;
3043       *const_cast<int *>(&default_algs->size[i].noalign)
3044           = input_ranges[i].noalign;
3045     }
3046 }
3047 
3048 
3049 /* parse -mtune-ctrl= option. When DUMP is true,
3050    print the features that are explicitly set.  */
3051 
3052 static void
3053 parse_mtune_ctrl_str (bool dump)
3054 {
3055   if (!ix86_tune_ctrl_string)
3056     return;
3057 
3058   char *next_feature_string = NULL;
3059   char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3060   char *orig = curr_feature_string;
3061   int i;
3062   do
3063     {
3064       bool clear = false;
3065 
3066       next_feature_string = strchr (curr_feature_string, ',');
3067       if (next_feature_string)
3068         *next_feature_string++ = '\0';
3069       if (*curr_feature_string == '^')
3070         {
3071           curr_feature_string++;
3072           clear = true;
3073         }
3074       for (i = 0; i < X86_TUNE_LAST; i++)
3075         {
3076           if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3077             {
3078               ix86_tune_features[i] = !clear;
3079               if (dump)
3080                 fprintf (stderr, "Explicitly %s feature %s\n",
3081                          clear ? "clear" : "set", ix86_tune_feature_names[i]);
3082               break;
3083             }
3084         }
3085       if (i == X86_TUNE_LAST)
3086         error ("Unknown parameter to option -mtune-ctrl: %s",
3087                clear ? curr_feature_string - 1 : curr_feature_string);
3088       curr_feature_string = next_feature_string;
3089     }
3090   while (curr_feature_string);
3091   free (orig);
3092 }
3093 
3094 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3095    processor type.  */
3096 
3097 static void
3098 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3099 {
3100   unsigned int ix86_tune_mask = 1u << ix86_tune;
3101   int i;
3102 
3103   for (i = 0; i < X86_TUNE_LAST; ++i)
3104     {
3105       if (ix86_tune_no_default)
3106         ix86_tune_features[i] = 0;
3107       else
3108         ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3109     }
3110 
3111   if (dump)
3112     {
3113       fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3114       for (i = 0; i < X86_TUNE_LAST; i++)
3115         fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3116                  ix86_tune_features[i] ? "on" : "off");
3117     }
3118 
3119   parse_mtune_ctrl_str (dump);
3120 }
3121 
3122 
3123 /* Override various settings based on options.  If MAIN_ARGS_P, the
3124    options are from the command line, otherwise they are from
3125    attributes.  */
3126 
3127 static void
3128 ix86_option_override_internal (bool main_args_p,
3129 			       struct gcc_options *opts,
3130 			       struct gcc_options *opts_set)
3131 {
3132   int i;
3133   unsigned int ix86_arch_mask;
3134   const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3135   const char *prefix;
3136   const char *suffix;
3137   const char *sw;
3138 
3139 #define PTA_3DNOW	 	(HOST_WIDE_INT_1 << 0)
3140 #define PTA_3DNOW_A	 	(HOST_WIDE_INT_1 << 1)
3141 #define PTA_64BIT		(HOST_WIDE_INT_1 << 2)
3142 #define PTA_ABM			(HOST_WIDE_INT_1 << 3)
3143 #define PTA_AES		 	(HOST_WIDE_INT_1 << 4)
3144 #define PTA_AVX			(HOST_WIDE_INT_1 << 5)
3145 #define PTA_BMI		 	(HOST_WIDE_INT_1 << 6)
3146 #define PTA_CX16		(HOST_WIDE_INT_1 << 7)
3147 #define PTA_F16C		(HOST_WIDE_INT_1 << 8)
3148 #define PTA_FMA			(HOST_WIDE_INT_1 << 9)
3149 #define PTA_FMA4	 	(HOST_WIDE_INT_1 << 10)
3150 #define PTA_FSGSBASE		(HOST_WIDE_INT_1 << 11)
3151 #define PTA_LWP		 	(HOST_WIDE_INT_1 << 12)
3152 #define PTA_LZCNT	 	(HOST_WIDE_INT_1 << 13)
3153 #define PTA_MMX			(HOST_WIDE_INT_1 << 14)
3154 #define PTA_MOVBE		(HOST_WIDE_INT_1 << 15)
3155 #define PTA_NO_SAHF		(HOST_WIDE_INT_1 << 16)
3156 #define PTA_PCLMUL		(HOST_WIDE_INT_1 << 17)
3157 #define PTA_POPCNT		(HOST_WIDE_INT_1 << 18)
3158 #define PTA_PREFETCH_SSE	(HOST_WIDE_INT_1 << 19)
3159 #define PTA_RDRND	 	(HOST_WIDE_INT_1 << 20)
3160 #define PTA_SSE			(HOST_WIDE_INT_1 << 21)
3161 #define PTA_SSE2		(HOST_WIDE_INT_1 << 22)
3162 #define PTA_SSE3		(HOST_WIDE_INT_1 << 23)
3163 #define PTA_SSE4_1	 	(HOST_WIDE_INT_1 << 24)
3164 #define PTA_SSE4_2	 	(HOST_WIDE_INT_1 << 25)
3165 #define PTA_SSE4A		(HOST_WIDE_INT_1 << 26)
3166 #define PTA_SSSE3		(HOST_WIDE_INT_1 << 27)
3167 #define PTA_TBM		 	(HOST_WIDE_INT_1 << 28)
3168 #define PTA_XOP		 	(HOST_WIDE_INT_1 << 29)
3169 #define PTA_AVX2		(HOST_WIDE_INT_1 << 30)
3170 #define PTA_BMI2	 	(HOST_WIDE_INT_1 << 31)
3171 #define PTA_RTM		 	(HOST_WIDE_INT_1 << 32)
3172 #define PTA_HLE			(HOST_WIDE_INT_1 << 33)
3173 #define PTA_PRFCHW		(HOST_WIDE_INT_1 << 34)
3174 #define PTA_RDSEED		(HOST_WIDE_INT_1 << 35)
3175 #define PTA_ADX			(HOST_WIDE_INT_1 << 36)
3176 #define PTA_FXSR		(HOST_WIDE_INT_1 << 37)
3177 #define PTA_XSAVE		(HOST_WIDE_INT_1 << 38)
3178 #define PTA_XSAVEOPT		(HOST_WIDE_INT_1 << 39)
3179 #define PTA_AVX512F		(HOST_WIDE_INT_1 << 40)
3180 #define PTA_AVX512ER		(HOST_WIDE_INT_1 << 41)
3181 #define PTA_AVX512PF		(HOST_WIDE_INT_1 << 42)
3182 #define PTA_AVX512CD		(HOST_WIDE_INT_1 << 43)
3183 #define PTA_MPX			(HOST_WIDE_INT_1 << 44)
3184 #define PTA_SHA			(HOST_WIDE_INT_1 << 45)
3185 #define PTA_PREFETCHWT1		(HOST_WIDE_INT_1 << 46)
3186 #define PTA_CLFLUSHOPT		(HOST_WIDE_INT_1 << 47)
3187 #define PTA_XSAVEC		(HOST_WIDE_INT_1 << 48)
3188 #define PTA_XSAVES		(HOST_WIDE_INT_1 << 49)
3189 #define PTA_AVX512DQ		(HOST_WIDE_INT_1 << 50)
3190 #define PTA_AVX512BW		(HOST_WIDE_INT_1 << 51)
3191 #define PTA_AVX512VL		(HOST_WIDE_INT_1 << 52)
3192 #define PTA_AVX512IFMA		(HOST_WIDE_INT_1 << 53)
3193 #define PTA_AVX512VBMI		(HOST_WIDE_INT_1 << 54)
3194 #define PTA_CLWB		(HOST_WIDE_INT_1 << 55)
3195 #define PTA_PCOMMIT		(HOST_WIDE_INT_1 << 56)
3196 #define PTA_MWAITX		(HOST_WIDE_INT_1 << 57)
3197 
3198 #define PTA_CORE2 \
3199   (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3200    | PTA_CX16 | PTA_FXSR)
3201 #define PTA_NEHALEM \
3202   (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3203 #define PTA_WESTMERE \
3204   (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3205 #define PTA_SANDYBRIDGE \
3206   (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3207 #define PTA_IVYBRIDGE \
3208   (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3209 #define PTA_HASWELL \
3210   (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3211    | PTA_FMA | PTA_MOVBE | PTA_HLE)
3212 #define PTA_BROADWELL \
3213   (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3214 #define PTA_KNL \
3215   (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3216 #define PTA_BONNELL \
3217   (PTA_CORE2 | PTA_MOVBE)
3218 #define PTA_SILVERMONT \
3219   (PTA_WESTMERE | PTA_MOVBE)
3220 
3221 /* if this reaches 64, need to widen struct pta flags below */
3222 
3223   static struct pta
3224     {
3225       const char *const name;		/* processor name or nickname.  */
3226       const enum processor_type processor;
3227       const enum attr_cpu schedule;
3228       const unsigned HOST_WIDE_INT flags;
3229     }
3230   const processor_alias_table[] =
3231     {
3232       {"i386", PROCESSOR_I386, CPU_NONE, 0},
3233       {"i486", PROCESSOR_I486, CPU_NONE, 0},
3234       {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3235       {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3236       {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3237       {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3238       {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3239       {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3240       {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3241 	PTA_MMX | PTA_SSE | PTA_FXSR},
3242       {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3243       {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3244       {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3245       {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3246 	PTA_MMX | PTA_SSE | PTA_FXSR},
3247       {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3248 	PTA_MMX | PTA_SSE | PTA_FXSR},
3249       {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3250 	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3251       {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3252 	PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3253       {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3254 	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3255       {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3256 	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3257       {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3258 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3259 	| PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3260       {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3261       {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3262       {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3263       {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3264       {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3265 	PTA_SANDYBRIDGE},
3266       {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3267 	PTA_SANDYBRIDGE},
3268       {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3269 	PTA_IVYBRIDGE},
3270       {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3271 	PTA_IVYBRIDGE},
3272       {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3273       {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3274       {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3275       {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3276       {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3277       {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3278       {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3279       {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3280       {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3281       {"geode", PROCESSOR_GEODE, CPU_GEODE,
3282 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3283       {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3284       {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3285       {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3286       {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3287 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3288       {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3289 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3290       {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3291 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3292       {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3293 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3294       {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3295 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3296       {"x86-64", PROCESSOR_K8, CPU_K8,
3297 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3298       {"k8", PROCESSOR_K8, CPU_K8,
3299 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3300 	| PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3301       {"k8-sse3", PROCESSOR_K8, CPU_K8,
3302 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3303 	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3304       {"opteron", PROCESSOR_K8, CPU_K8,
3305 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3306 	| PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3307       {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3308 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3309 	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3310       {"athlon64", PROCESSOR_K8, CPU_K8,
3311 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3312 	| PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3313       {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3314 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3315 	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3316       {"athlon-fx", PROCESSOR_K8, CPU_K8,
3317 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3318 	| PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3319       {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3320 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3321 	| PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3322       {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3323 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3324 	| PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3325       {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3326 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3327 	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3328 	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3329 	| PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3330       {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3331 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3332 	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3333 	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3334 	| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3335 	| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3336       {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3337 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3338 	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3339 	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3340 	| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3341 	| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3342 	| PTA_XSAVEOPT | PTA_FSGSBASE},
3343      {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3344 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3345 	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3346 	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3347 	| PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3348 	| PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3349 	| PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3350 	| PTA_MOVBE | PTA_MWAITX},
3351       {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3352 	PTA_64BIT | PTA_MMX |  PTA_SSE  | PTA_SSE2 | PTA_SSE3
3353 	| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3354 	| PTA_FXSR | PTA_XSAVE},
3355       {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3356 	PTA_64BIT | PTA_MMX |  PTA_SSE  | PTA_SSE2 | PTA_SSE3
3357 	| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3358 	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3359 	| PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3360 	| PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3361 
3362       {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3363 	PTA_64BIT
3364 	| PTA_HLE /* flags are only used for -march switch.  */ },
3365     };
3366 
3367   /* -mrecip options.  */
3368   static struct
3369     {
3370       const char *string;           /* option name */
3371       unsigned int mask;            /* mask bits to set */
3372     }
3373   const recip_options[] =
3374     {
3375       { "all",       RECIP_MASK_ALL },
3376       { "none",      RECIP_MASK_NONE },
3377       { "div",       RECIP_MASK_DIV },
3378       { "sqrt",      RECIP_MASK_SQRT },
3379       { "vec-div",   RECIP_MASK_VEC_DIV },
3380       { "vec-sqrt",  RECIP_MASK_VEC_SQRT },
3381     };
3382 
3383   int const pta_size = ARRAY_SIZE (processor_alias_table);
3384 
3385   /* Set up prefix/suffix so the error messages refer to either the command
3386      line argument, or the attribute(target).  */
3387   if (main_args_p)
3388     {
3389       prefix = "-m";
3390       suffix = "";
3391       sw = "switch";
3392     }
3393   else
3394     {
3395       prefix = "option(\"";
3396       suffix = "\")";
3397       sw = "attribute";
3398     }
3399 
3400   /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3401      TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false.  */
3402   if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3403     opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3404 #ifdef TARGET_BI_ARCH
3405   else
3406     {
3407 #if TARGET_BI_ARCH == 1
3408       /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3409 	 is on and OPTION_MASK_ABI_X32 is off.  We turn off
3410 	 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3411 	 -mx32.  */
3412       if (TARGET_X32_P (opts->x_ix86_isa_flags))
3413 	opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3414 #else
3415       /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3416 	 on and OPTION_MASK_ABI_64 is off.  We turn off
3417 	 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3418 	 -m64 or OPTION_MASK_CODE16 is turned on by -m16.  */
3419       if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3420 	  || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3421 	opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3422 #endif
3423     }
3424 #endif
3425 
3426   if (TARGET_X32_P (opts->x_ix86_isa_flags))
3427     {
3428       /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3429 	 OPTION_MASK_ABI_64 for TARGET_X32.  */
3430       opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3431       opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3432     }
3433   else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3434     opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3435 				| OPTION_MASK_ABI_X32
3436 				| OPTION_MASK_ABI_64);
3437   else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3438     {
3439       /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3440 	 OPTION_MASK_ABI_X32 for TARGET_LP64.  */
3441       opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3442       opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3443     }
3444 
3445 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3446   SUBTARGET_OVERRIDE_OPTIONS;
3447 #endif
3448 
3449 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3450   SUBSUBTARGET_OVERRIDE_OPTIONS;
3451 #endif
3452 
3453   /* -fPIC is the default for x86_64.  */
3454   if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3455     opts->x_flag_pic = 2;
3456 
3457   /* Need to check -mtune=generic first.  */
3458   if (opts->x_ix86_tune_string)
3459     {
3460       /* As special support for cross compilers we read -mtune=native
3461 	     as -mtune=generic.  With native compilers we won't see the
3462 	     -mtune=native, as it was changed by the driver.  */
3463       if (!strcmp (opts->x_ix86_tune_string, "native"))
3464 	{
3465 	  opts->x_ix86_tune_string = "generic";
3466 	}
3467       else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3468         warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3469                  "%stune=k8%s or %stune=generic%s instead as appropriate",
3470                  prefix, suffix, prefix, suffix, prefix, suffix);
3471     }
3472   else
3473     {
3474       if (opts->x_ix86_arch_string)
3475 	opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3476       if (!opts->x_ix86_tune_string)
3477 	{
3478 	  opts->x_ix86_tune_string
3479 	    = processor_target_table[TARGET_CPU_DEFAULT].name;
3480 	  ix86_tune_defaulted = 1;
3481 	}
3482 
3483       /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3484 	 or defaulted.  We need to use a sensible tune option.  */
3485       if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3486 	{
3487 	  opts->x_ix86_tune_string = "generic";
3488 	}
3489     }
3490 
3491   if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3492       && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3493     {
3494       /* rep; movq isn't available in 32-bit code.  */
3495       error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3496       opts->x_ix86_stringop_alg = no_stringop;
3497     }
3498 
3499   if (!opts->x_ix86_arch_string)
3500     opts->x_ix86_arch_string
3501       = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3502 	? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3503   else
3504     ix86_arch_specified = 1;
3505 
3506   if (opts_set->x_ix86_pmode)
3507     {
3508       if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3509 	   && opts->x_ix86_pmode == PMODE_SI)
3510 	  || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3511 	       && opts->x_ix86_pmode == PMODE_DI))
3512 	error ("address mode %qs not supported in the %s bit mode",
3513 	       TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3514 	       TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3515     }
3516   else
3517     opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3518 			 ? PMODE_DI : PMODE_SI;
3519 
3520   if (!opts_set->x_ix86_abi)
3521     opts->x_ix86_abi = DEFAULT_ABI;
3522 
3523   /* For targets using ms ABI enable ms-extensions, if not
3524      explicit turned off.  For non-ms ABI we turn off this
3525      option.  */
3526   if (!opts_set->x_flag_ms_extensions)
3527     opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3528 
3529   if (opts_set->x_ix86_cmodel)
3530     {
3531       switch (opts->x_ix86_cmodel)
3532 	{
3533 	case CM_SMALL:
3534 	case CM_SMALL_PIC:
3535 	  if (opts->x_flag_pic)
3536 	    opts->x_ix86_cmodel = CM_SMALL_PIC;
3537 	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3538 	    error ("code model %qs not supported in the %s bit mode",
3539 		   "small", "32");
3540 	  break;
3541 
3542 	case CM_MEDIUM:
3543 	case CM_MEDIUM_PIC:
3544 	  if (opts->x_flag_pic)
3545 	    opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3546 	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3547 	    error ("code model %qs not supported in the %s bit mode",
3548 		   "medium", "32");
3549 	  else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3550 	    error ("code model %qs not supported in x32 mode",
3551 		   "medium");
3552 	  break;
3553 
3554 	case CM_LARGE:
3555 	case CM_LARGE_PIC:
3556 	  if (opts->x_flag_pic)
3557 	    opts->x_ix86_cmodel = CM_LARGE_PIC;
3558 	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3559 	    error ("code model %qs not supported in the %s bit mode",
3560 		   "large", "32");
3561 	  else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3562 	    error ("code model %qs not supported in x32 mode",
3563 		   "large");
3564 	  break;
3565 
3566 	case CM_32:
3567 	  if (opts->x_flag_pic)
3568 	    error ("code model %s does not support PIC mode", "32");
3569 	  if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3570 	    error ("code model %qs not supported in the %s bit mode",
3571 		   "32", "64");
3572 	  break;
3573 
3574 	case CM_KERNEL:
3575 	  if (opts->x_flag_pic)
3576 	    {
3577 	      error ("code model %s does not support PIC mode", "kernel");
3578 	      opts->x_ix86_cmodel = CM_32;
3579 	    }
3580 	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3581 	    error ("code model %qs not supported in the %s bit mode",
3582 		   "kernel", "32");
3583 	  break;
3584 
3585 	default:
3586 	  gcc_unreachable ();
3587 	}
3588     }
3589   else
3590     {
3591       /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3592 	 use of rip-relative addressing.  This eliminates fixups that
3593 	 would otherwise be needed if this object is to be placed in a
3594 	 DLL, and is essentially just as efficient as direct addressing.  */
3595       if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3596 	  && (TARGET_RDOS || TARGET_PECOFF))
3597 	opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3598       else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3599 	opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3600       else
3601 	opts->x_ix86_cmodel = CM_32;
3602     }
3603   if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3604     {
3605       error ("-masm=intel not supported in this configuration");
3606       opts->x_ix86_asm_dialect = ASM_ATT;
3607     }
3608   if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3609       != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3610     sorry ("%i-bit mode not compiled in",
3611 	   (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3612 
3613   for (i = 0; i < pta_size; i++)
3614     if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3615       {
3616 	ix86_schedule = processor_alias_table[i].schedule;
3617 	ix86_arch = processor_alias_table[i].processor;
3618 	/* Default cpu tuning to the architecture.  */
3619 	ix86_tune = ix86_arch;
3620 
3621 	if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3622 	    && !(processor_alias_table[i].flags & PTA_64BIT))
3623 	  error ("CPU you selected does not support x86-64 "
3624 		 "instruction set");
3625 
3626 	if (processor_alias_table[i].flags & PTA_MMX
3627 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3628 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3629 	if (processor_alias_table[i].flags & PTA_3DNOW
3630 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3631 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3632 	if (processor_alias_table[i].flags & PTA_3DNOW_A
3633 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3634 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3635 	if (processor_alias_table[i].flags & PTA_SSE
3636 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3637 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3638 	if (processor_alias_table[i].flags & PTA_SSE2
3639 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3640 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3641 	if (processor_alias_table[i].flags & PTA_SSE3
3642 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3643 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3644 	if (processor_alias_table[i].flags & PTA_SSSE3
3645 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3646 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3647 	if (processor_alias_table[i].flags & PTA_SSE4_1
3648 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3649 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3650 	if (processor_alias_table[i].flags & PTA_SSE4_2
3651 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3652 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3653 	if (processor_alias_table[i].flags & PTA_AVX
3654 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3655 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3656 	if (processor_alias_table[i].flags & PTA_AVX2
3657 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3658 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3659 	if (processor_alias_table[i].flags & PTA_FMA
3660 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3661 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3662 	if (processor_alias_table[i].flags & PTA_SSE4A
3663 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3664 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3665 	if (processor_alias_table[i].flags & PTA_FMA4
3666 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3667 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3668 	if (processor_alias_table[i].flags & PTA_XOP
3669 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3670 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3671 	if (processor_alias_table[i].flags & PTA_LWP
3672 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3673 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3674 	if (processor_alias_table[i].flags & PTA_ABM
3675 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3676 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3677 	if (processor_alias_table[i].flags & PTA_BMI
3678 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3679 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3680 	if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3681 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3682 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3683 	if (processor_alias_table[i].flags & PTA_TBM
3684 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3685 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3686 	if (processor_alias_table[i].flags & PTA_BMI2
3687 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3688 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3689 	if (processor_alias_table[i].flags & PTA_CX16
3690 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3691 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3692 	if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3693 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3694 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3695 	if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3696 	    && (processor_alias_table[i].flags & PTA_NO_SAHF))
3697 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3698 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3699 	if (processor_alias_table[i].flags & PTA_MOVBE
3700 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3701 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3702 	if (processor_alias_table[i].flags & PTA_AES
3703 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3704 	  ix86_isa_flags |= OPTION_MASK_ISA_AES;
3705 	if (processor_alias_table[i].flags & PTA_SHA
3706 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3707 	  ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3708 	if (processor_alias_table[i].flags & PTA_PCLMUL
3709 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3710 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3711 	if (processor_alias_table[i].flags & PTA_FSGSBASE
3712 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3713 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3714 	if (processor_alias_table[i].flags & PTA_RDRND
3715 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3716 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3717 	if (processor_alias_table[i].flags & PTA_F16C
3718 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3719 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3720 	if (processor_alias_table[i].flags & PTA_RTM
3721 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3722 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3723 	if (processor_alias_table[i].flags & PTA_HLE
3724 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3725 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3726 	if (processor_alias_table[i].flags & PTA_PRFCHW
3727 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3728 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3729 	if (processor_alias_table[i].flags & PTA_RDSEED
3730 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3731 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3732 	if (processor_alias_table[i].flags & PTA_ADX
3733 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3734 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3735 	if (processor_alias_table[i].flags & PTA_FXSR
3736 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3737 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3738 	if (processor_alias_table[i].flags & PTA_XSAVE
3739 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3740 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3741 	if (processor_alias_table[i].flags & PTA_XSAVEOPT
3742 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3743 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3744 	if (processor_alias_table[i].flags & PTA_AVX512F
3745 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3746 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3747 	if (processor_alias_table[i].flags & PTA_AVX512ER
3748 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3749 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3750 	if (processor_alias_table[i].flags & PTA_AVX512PF
3751 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3752 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3753 	if (processor_alias_table[i].flags & PTA_AVX512CD
3754 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3755 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3756 	if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3757 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3758 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3759 	if (processor_alias_table[i].flags & PTA_PCOMMIT
3760 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3761 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3762 	if (processor_alias_table[i].flags & PTA_CLWB
3763 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3764 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3765 	if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3766 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3767 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3768 	if (processor_alias_table[i].flags & PTA_XSAVEC
3769 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3770 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3771 	if (processor_alias_table[i].flags & PTA_XSAVES
3772 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3773 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3774 	if (processor_alias_table[i].flags & PTA_AVX512DQ
3775 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3776 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3777 	if (processor_alias_table[i].flags & PTA_AVX512BW
3778 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3779 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3780 	if (processor_alias_table[i].flags & PTA_AVX512VL
3781 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3782 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3783         if (processor_alias_table[i].flags & PTA_MPX
3784             && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3785           opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3786 	if (processor_alias_table[i].flags & PTA_AVX512VBMI
3787 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3788 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3789 	if (processor_alias_table[i].flags & PTA_AVX512IFMA
3790 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3791 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3792 	if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3793 	  x86_prefetch_sse = true;
3794 	if (processor_alias_table[i].flags & PTA_MWAITX
3795 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
3796 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
3797 
3798 	break;
3799       }
3800 
3801   if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3802     error ("Intel MPX does not support x32");
3803 
3804   if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3805     error ("Intel MPX does not support x32");
3806 
3807   if (!strcmp (opts->x_ix86_arch_string, "generic"))
3808     error ("generic CPU can be used only for %stune=%s %s",
3809 	   prefix, suffix, sw);
3810   else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3811     error ("intel CPU can be used only for %stune=%s %s",
3812 	   prefix, suffix, sw);
3813   else if (i == pta_size)
3814     error ("bad value (%s) for %sarch=%s %s",
3815 	   opts->x_ix86_arch_string, prefix, suffix, sw);
3816 
3817   ix86_arch_mask = 1u << ix86_arch;
3818   for (i = 0; i < X86_ARCH_LAST; ++i)
3819     ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3820 
3821   for (i = 0; i < pta_size; i++)
3822     if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3823       {
3824 	ix86_schedule = processor_alias_table[i].schedule;
3825 	ix86_tune = processor_alias_table[i].processor;
3826 	if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3827 	  {
3828 	    if (!(processor_alias_table[i].flags & PTA_64BIT))
3829 	      {
3830 		if (ix86_tune_defaulted)
3831 		  {
3832 		    opts->x_ix86_tune_string = "x86-64";
3833 		    for (i = 0; i < pta_size; i++)
3834 		      if (! strcmp (opts->x_ix86_tune_string,
3835 				    processor_alias_table[i].name))
3836 			break;
3837 		    ix86_schedule = processor_alias_table[i].schedule;
3838 		    ix86_tune = processor_alias_table[i].processor;
3839 		  }
3840 		else
3841 		  error ("CPU you selected does not support x86-64 "
3842 			 "instruction set");
3843 	      }
3844 	  }
3845 	/* Intel CPUs have always interpreted SSE prefetch instructions as
3846 	   NOPs; so, we can enable SSE prefetch instructions even when
3847 	   -mtune (rather than -march) points us to a processor that has them.
3848 	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3849 	   higher processors.  */
3850 	if (TARGET_CMOV
3851 	    && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3852 	  x86_prefetch_sse = true;
3853 	break;
3854       }
3855 
3856   if (ix86_tune_specified && i == pta_size)
3857     error ("bad value (%s) for %stune=%s %s",
3858 	   opts->x_ix86_tune_string, prefix, suffix, sw);
3859 
3860   set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3861 
3862 #ifndef USE_IX86_FRAME_POINTER
3863 #define USE_IX86_FRAME_POINTER 0
3864 #endif
3865 
3866 #ifndef USE_X86_64_FRAME_POINTER
3867 #define USE_X86_64_FRAME_POINTER 0
3868 #endif
3869 
3870   /* Set the default values for switches whose default depends on TARGET_64BIT
3871      in case they weren't overwritten by command line options.  */
3872   if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3873     {
3874       if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3875 	opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3876       if (opts->x_flag_asynchronous_unwind_tables
3877 	  && !opts_set->x_flag_unwind_tables
3878 	  && TARGET_64BIT_MS_ABI)
3879 	opts->x_flag_unwind_tables = 1;
3880       if (opts->x_flag_asynchronous_unwind_tables == 2)
3881 	opts->x_flag_unwind_tables
3882 	  = opts->x_flag_asynchronous_unwind_tables = 1;
3883       if (opts->x_flag_pcc_struct_return == 2)
3884 	opts->x_flag_pcc_struct_return = 0;
3885     }
3886   else
3887     {
3888       if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3889 	opts->x_flag_omit_frame_pointer
3890 	  = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3891       if (opts->x_flag_asynchronous_unwind_tables == 2)
3892 	opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3893       if (opts->x_flag_pcc_struct_return == 2)
3894 	opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3895     }
3896 
3897   ix86_tune_cost = processor_target_table[ix86_tune].cost;
3898   /* TODO: ix86_cost should be chosen at instruction or function granuality
3899      so for cold code we use size_cost even in !optimize_size compilation.  */
3900   if (opts->x_optimize_size)
3901     ix86_cost = &ix86_size_cost;
3902   else
3903     ix86_cost = ix86_tune_cost;
3904 
3905   /* Arrange to set up i386_stack_locals for all functions.  */
3906   init_machine_status = ix86_init_machine_status;
3907 
3908   /* Validate -mregparm= value.  */
3909   if (opts_set->x_ix86_regparm)
3910     {
3911       if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3912 	warning (0, "-mregparm is ignored in 64-bit mode");
3913       if (opts->x_ix86_regparm > REGPARM_MAX)
3914 	{
3915 	  error ("-mregparm=%d is not between 0 and %d",
3916 		 opts->x_ix86_regparm, REGPARM_MAX);
3917 	  opts->x_ix86_regparm = 0;
3918 	}
3919     }
3920   if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3921     opts->x_ix86_regparm = REGPARM_MAX;
3922 
3923   /* Default align_* from the processor table.  */
3924   if (opts->x_align_loops == 0)
3925     {
3926       opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3927       align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3928     }
3929   if (opts->x_align_jumps == 0)
3930     {
3931       opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3932       align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3933     }
3934   if (opts->x_align_functions == 0)
3935     {
3936       opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3937     }
3938 
3939   /* Provide default for -mbranch-cost= value.  */
3940   if (!opts_set->x_ix86_branch_cost)
3941     opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3942 
3943   if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3944     {
3945       opts->x_target_flags
3946 	|= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3947 
3948       /* Enable by default the SSE and MMX builtins.  Do allow the user to
3949 	 explicitly disable any of these.  In particular, disabling SSE and
3950 	 MMX for kernel code is extremely useful.  */
3951       if (!ix86_arch_specified)
3952       opts->x_ix86_isa_flags
3953 	|= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3954 	     | TARGET_SUBTARGET64_ISA_DEFAULT)
3955             & ~opts->x_ix86_isa_flags_explicit);
3956 
3957       if (TARGET_RTD_P (opts->x_target_flags))
3958 	warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3959     }
3960   else
3961     {
3962       opts->x_target_flags
3963 	|= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3964 
3965       if (!ix86_arch_specified)
3966         opts->x_ix86_isa_flags
3967 	  |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3968 
3969       /* i386 ABI does not specify red zone.  It still makes sense to use it
3970          when programmer takes care to stack from being destroyed.  */
3971       if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3972         opts->x_target_flags |= MASK_NO_RED_ZONE;
3973     }
3974 
3975   /* Keep nonleaf frame pointers.  */
3976   if (opts->x_flag_omit_frame_pointer)
3977     opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3978   else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3979     opts->x_flag_omit_frame_pointer = 1;
3980 
3981   /* If we're doing fast math, we don't care about comparison order
3982      wrt NaNs.  This lets us use a shorter comparison sequence.  */
3983   if (opts->x_flag_finite_math_only)
3984     opts->x_target_flags &= ~MASK_IEEE_FP;
3985 
3986   /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3987      since the insns won't need emulation.  */
3988   if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3989     opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3990 
3991   /* Likewise, if the target doesn't have a 387, or we've specified
3992      software floating point, don't use 387 inline intrinsics.  */
3993   if (!TARGET_80387_P (opts->x_target_flags))
3994     opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3995 
3996   /* Turn on MMX builtins for -msse.  */
3997   if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3998     opts->x_ix86_isa_flags
3999       |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4000 
4001   /* Enable SSE prefetch.  */
4002   if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4003       || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4004     x86_prefetch_sse = true;
4005 
4006   /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1.  */
4007   if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4008       || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4009     opts->x_ix86_isa_flags
4010       |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4011 
4012   /* Enable popcnt instruction for -msse4.2 or -mabm.  */
4013   if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4014       || TARGET_ABM_P (opts->x_ix86_isa_flags))
4015     opts->x_ix86_isa_flags
4016       |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4017 
4018   /* Enable lzcnt instruction for -mabm.  */
4019   if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4020     opts->x_ix86_isa_flags
4021       |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4022 
4023   /* Validate -mpreferred-stack-boundary= value or default it to
4024      PREFERRED_STACK_BOUNDARY_DEFAULT.  */
4025   ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4026   if (opts_set->x_ix86_preferred_stack_boundary_arg)
4027     {
4028       int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4029 		 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4030       int max = (TARGET_SEH ? 4 : 12);
4031 
4032       if (opts->x_ix86_preferred_stack_boundary_arg < min
4033 	  || opts->x_ix86_preferred_stack_boundary_arg > max)
4034 	{
4035 	  if (min == max)
4036 	    error ("-mpreferred-stack-boundary is not supported "
4037 		   "for this target");
4038 	  else
4039 	    error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4040 		   opts->x_ix86_preferred_stack_boundary_arg, min, max);
4041 	}
4042       else
4043 	ix86_preferred_stack_boundary
4044 	  = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4045     }
4046 
4047   /* Set the default value for -mstackrealign.  */
4048   if (opts->x_ix86_force_align_arg_pointer == -1)
4049     opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4050 
4051   ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4052 
4053   /* Validate -mincoming-stack-boundary= value or default it to
4054      MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY.  */
4055   ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4056   if (opts_set->x_ix86_incoming_stack_boundary_arg)
4057     {
4058       if (opts->x_ix86_incoming_stack_boundary_arg
4059 	  < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2)
4060 	  || opts->x_ix86_incoming_stack_boundary_arg > 12)
4061 	error ("-mincoming-stack-boundary=%d is not between %d and 12",
4062 	       opts->x_ix86_incoming_stack_boundary_arg,
4063 	       TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2);
4064       else
4065 	{
4066 	  ix86_user_incoming_stack_boundary
4067 	    = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4068 	  ix86_incoming_stack_boundary
4069 	    = ix86_user_incoming_stack_boundary;
4070 	}
4071     }
4072 
4073 #ifndef NO_PROFILE_COUNTERS
4074   if (flag_nop_mcount)
4075     error ("-mnop-mcount is not compatible with this target");
4076 #endif
4077   if (flag_nop_mcount && flag_pic)
4078     error ("-mnop-mcount is not implemented for -fPIC");
4079 
4080   /* Accept -msseregparm only if at least SSE support is enabled.  */
4081   if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4082       && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4083     error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4084 
4085   if (opts_set->x_ix86_fpmath)
4086     {
4087       if (opts->x_ix86_fpmath & FPMATH_SSE)
4088 	{
4089 	  if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4090 	    {
4091 	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
4092 	      opts->x_ix86_fpmath = FPMATH_387;
4093 	    }
4094 	  else if ((opts->x_ix86_fpmath & FPMATH_387)
4095 		   && !TARGET_80387_P (opts->x_target_flags))
4096 	    {
4097 	      warning (0, "387 instruction set disabled, using SSE arithmetics");
4098 	      opts->x_ix86_fpmath = FPMATH_SSE;
4099 	    }
4100 	}
4101     }
4102   /* For all chips supporting SSE2, -mfpmath=sse performs better than
4103      fpmath=387.  The second is however default at many targets since the
4104      extra 80bit precision of temporaries is considered to be part of ABI.
4105      Overwrite the default at least for -ffast-math.
4106      TODO: -mfpmath=both seems to produce same performing code with bit
4107      smaller binaries.  It is however not clear if register allocation is
4108      ready for this setting.
4109      Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4110      codegen.  We may switch to 387 with -ffast-math for size optimized
4111      functions. */
4112   else if (fast_math_flags_set_p (&global_options)
4113 	   && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4114     opts->x_ix86_fpmath = FPMATH_SSE;
4115   else
4116     opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4117 
4118   /* If the i387 is disabled, then do not return values in it. */
4119   if (!TARGET_80387_P (opts->x_target_flags))
4120     opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4121 
4122   /* Use external vectorized library in vectorizing intrinsics.  */
4123   if (opts_set->x_ix86_veclibabi_type)
4124     switch (opts->x_ix86_veclibabi_type)
4125       {
4126       case ix86_veclibabi_type_svml:
4127 	ix86_veclib_handler = ix86_veclibabi_svml;
4128 	break;
4129 
4130       case ix86_veclibabi_type_acml:
4131 	ix86_veclib_handler = ix86_veclibabi_acml;
4132 	break;
4133 
4134       default:
4135 	gcc_unreachable ();
4136       }
4137 
4138   if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4139       && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4140     opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4141 
4142   /* If stack probes are required, the space used for large function
4143      arguments on the stack must also be probed, so enable
4144      -maccumulate-outgoing-args so this happens in the prologue.  */
4145   if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4146       && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4147     {
4148       if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4149 	warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4150 		 "for correctness", prefix, suffix);
4151       opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4152     }
4153 
4154   /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
4155      so enable -maccumulate-outgoing-args when %ebp is fixed.  */
4156   if (fixed_regs[BP_REG]
4157       && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4158     {
4159       if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4160 	warning (0, "fixed ebp register requires %saccumulate-outgoing-args%s",
4161 		 prefix, suffix);
4162       opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4163     }
4164 
4165   /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
4166   {
4167     char *p;
4168     ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4169     p = strchr (internal_label_prefix, 'X');
4170     internal_label_prefix_len = p - internal_label_prefix;
4171     *p = '\0';
4172   }
4173 
4174   /* When scheduling description is not available, disable scheduler pass
4175      so it won't slow down the compilation and make x87 code slower.  */
4176   if (!TARGET_SCHEDULE)
4177     opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4178 
4179   maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4180 			 ix86_tune_cost->simultaneous_prefetches,
4181 			 opts->x_param_values,
4182 			 opts_set->x_param_values);
4183   maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4184 			 ix86_tune_cost->prefetch_block,
4185 			 opts->x_param_values,
4186 			 opts_set->x_param_values);
4187   maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4188 			 ix86_tune_cost->l1_cache_size,
4189 			 opts->x_param_values,
4190 			 opts_set->x_param_values);
4191   maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4192 			 ix86_tune_cost->l2_cache_size,
4193 			 opts->x_param_values,
4194 			 opts_set->x_param_values);
4195 
4196   /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful.  */
4197   if (opts->x_flag_prefetch_loop_arrays < 0
4198       && HAVE_prefetch
4199       && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4200       && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4201     opts->x_flag_prefetch_loop_arrays = 1;
4202 
4203   /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4204      can be opts->x_optimized to ap = __builtin_next_arg (0).  */
4205   if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4206     targetm.expand_builtin_va_start = NULL;
4207 
4208   if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4209     {
4210       ix86_gen_leave = gen_leave_rex64;
4211       if (Pmode == DImode)
4212 	{
4213 	  ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4214 	  ix86_gen_tls_local_dynamic_base_64
4215 	    = gen_tls_local_dynamic_base_64_di;
4216 	}
4217       else
4218 	{
4219 	  ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4220 	  ix86_gen_tls_local_dynamic_base_64
4221 	    = gen_tls_local_dynamic_base_64_si;
4222 	}
4223     }
4224   else
4225     ix86_gen_leave = gen_leave;
4226 
4227   if (Pmode == DImode)
4228     {
4229       ix86_gen_add3 = gen_adddi3;
4230       ix86_gen_sub3 = gen_subdi3;
4231       ix86_gen_sub3_carry = gen_subdi3_carry;
4232       ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4233       ix86_gen_andsp = gen_anddi3;
4234       ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4235       ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4236       ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4237       ix86_gen_monitor = gen_sse3_monitor_di;
4238       ix86_gen_monitorx = gen_monitorx_di;
4239     }
4240   else
4241     {
4242       ix86_gen_add3 = gen_addsi3;
4243       ix86_gen_sub3 = gen_subsi3;
4244       ix86_gen_sub3_carry = gen_subsi3_carry;
4245       ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4246       ix86_gen_andsp = gen_andsi3;
4247       ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4248       ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4249       ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4250       ix86_gen_monitor = gen_sse3_monitor_si;
4251       ix86_gen_monitorx = gen_monitorx_si;
4252     }
4253 
4254 #ifdef USE_IX86_CLD
4255   /* Use -mcld by default for 32-bit code if configured with --enable-cld.  */
4256   if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4257     opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4258 #endif
4259 
4260   if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4261     {
4262       if (opts->x_flag_fentry > 0)
4263         sorry ("-mfentry isn%'t supported for 32-bit in combination "
4264 	       "with -fpic");
4265       opts->x_flag_fentry = 0;
4266     }
4267   else if (TARGET_SEH)
4268     {
4269       if (opts->x_flag_fentry == 0)
4270 	sorry ("-mno-fentry isn%'t compatible with SEH");
4271       opts->x_flag_fentry = 1;
4272     }
4273   else if (opts->x_flag_fentry < 0)
4274    {
4275 #if defined(PROFILE_BEFORE_PROLOGUE)
4276      opts->x_flag_fentry = 1;
4277 #else
4278      opts->x_flag_fentry = 0;
4279 #endif
4280    }
4281 
4282   if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4283     opts->x_target_flags |= MASK_VZEROUPPER;
4284   if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4285       && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4286     opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4287   if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4288       && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4289     opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4290   /* Enable 128-bit AVX instruction generation
4291      for the auto-vectorizer.  */
4292   if (TARGET_AVX128_OPTIMAL
4293       && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4294     opts->x_target_flags |= MASK_PREFER_AVX128;
4295 
4296   if (opts->x_ix86_recip_name)
4297     {
4298       char *p = ASTRDUP (opts->x_ix86_recip_name);
4299       char *q;
4300       unsigned int mask, i;
4301       bool invert;
4302 
4303       while ((q = strtok (p, ",")) != NULL)
4304 	{
4305 	  p = NULL;
4306 	  if (*q == '!')
4307 	    {
4308 	      invert = true;
4309 	      q++;
4310 	    }
4311 	  else
4312 	    invert = false;
4313 
4314 	  if (!strcmp (q, "default"))
4315 	    mask = RECIP_MASK_ALL;
4316 	  else
4317 	    {
4318 	      for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4319 		if (!strcmp (q, recip_options[i].string))
4320 		  {
4321 		    mask = recip_options[i].mask;
4322 		    break;
4323 		  }
4324 
4325 	      if (i == ARRAY_SIZE (recip_options))
4326 		{
4327 		  error ("unknown option for -mrecip=%s", q);
4328 		  invert = false;
4329 		  mask = RECIP_MASK_NONE;
4330 		}
4331 	    }
4332 
4333 	  opts->x_recip_mask_explicit |= mask;
4334 	  if (invert)
4335 	    opts->x_recip_mask &= ~mask;
4336 	  else
4337 	    opts->x_recip_mask |= mask;
4338 	}
4339     }
4340 
4341   if (TARGET_RECIP_P (opts->x_target_flags))
4342     opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4343   else if (opts_set->x_target_flags & MASK_RECIP)
4344     opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4345 
4346   /* Default long double to 64-bit for 32-bit Bionic and to __float128
4347      for 64-bit Bionic.  */
4348   if (TARGET_HAS_BIONIC
4349       && !(opts_set->x_target_flags
4350 	   & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4351     opts->x_target_flags |= (TARGET_64BIT
4352 			     ? MASK_LONG_DOUBLE_128
4353 			     : MASK_LONG_DOUBLE_64);
4354 
4355   /* Only one of them can be active.  */
4356   gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4357 	      || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4358 
4359   /* Save the initial options in case the user does function specific
4360      options.  */
4361   if (main_args_p)
4362     target_option_default_node = target_option_current_node
4363       = build_target_option_node (opts);
4364 
4365   /* Handle stack protector */
4366   if (!opts_set->x_ix86_stack_protector_guard)
4367     opts->x_ix86_stack_protector_guard
4368       = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4369 
4370   /* Handle -mmemcpy-strategy= and -mmemset-strategy=  */
4371   if (opts->x_ix86_tune_memcpy_strategy)
4372     {
4373       char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4374       ix86_parse_stringop_strategy_string (str, false);
4375       free (str);
4376     }
4377 
4378   if (opts->x_ix86_tune_memset_strategy)
4379     {
4380       char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4381       ix86_parse_stringop_strategy_string (str, true);
4382       free (str);
4383     }
4384 }
4385 
4386 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
4387 
4388 static void
4389 ix86_option_override (void)
4390 {
4391   opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4392   struct register_pass_info insert_vzeroupper_info
4393     = { pass_insert_vzeroupper, "reload",
4394 	1, PASS_POS_INSERT_AFTER
4395       };
4396 
4397   ix86_option_override_internal (true, &global_options, &global_options_set);
4398 
4399 
4400   /* This needs to be done at start up.  It's convenient to do it here.  */
4401   register_pass (&insert_vzeroupper_info);
4402 }
4403 
4404 /* Implement the TARGET_OFFLOAD_OPTIONS hook.  */
4405 static char *
4406 ix86_offload_options (void)
4407 {
4408   if (TARGET_LP64)
4409     return xstrdup ("-foffload-abi=lp64");
4410   return xstrdup ("-foffload-abi=ilp32");
4411 }
4412 
4413 /* Update register usage after having seen the compiler flags.  */
4414 
4415 static void
4416 ix86_conditional_register_usage (void)
4417 {
4418   int i, c_mask;
4419 
4420   /* For 32-bit targets, squash the REX registers.  */
4421   if (! TARGET_64BIT)
4422     {
4423       for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4424 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4425       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4426 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4427       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4428 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4429     }
4430 
4431   /*  See the definition of CALL_USED_REGISTERS in i386.h.  */
4432   c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4433 	    : TARGET_64BIT ? (1 << 2)
4434 	    : (1 << 1));
4435 
4436   CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4437 
4438   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4439     {
4440       /* Set/reset conditionally defined registers from
4441 	 CALL_USED_REGISTERS initializer.  */
4442       if (call_used_regs[i] > 1)
4443 	call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4444 
4445       /* Calculate registers of CLOBBERED_REGS register set
4446 	 as call used registers from GENERAL_REGS register set.  */
4447       if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4448 	  && call_used_regs[i])
4449 	SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4450     }
4451 
4452   /* If MMX is disabled, squash the registers.  */
4453   if (! TARGET_MMX)
4454     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4455       if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4456 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4457 
4458   /* If SSE is disabled, squash the registers.  */
4459   if (! TARGET_SSE)
4460     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4461       if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4462 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4463 
4464   /* If the FPU is disabled, squash the registers.  */
4465   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4466     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4467       if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4468 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4469 
4470   /* If AVX512F is disabled, squash the registers.  */
4471   if (! TARGET_AVX512F)
4472     {
4473       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4474 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4475 
4476       for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4477 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4478     }
4479 
4480   /* If MPX is disabled, squash the registers.  */
4481   if (! TARGET_MPX)
4482     for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4483       fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4484 }
4485 
4486 
4487 /* Save the current options */
4488 
4489 static void
4490 ix86_function_specific_save (struct cl_target_option *ptr,
4491 			     struct gcc_options *opts)
4492 {
4493   ptr->arch = ix86_arch;
4494   ptr->schedule = ix86_schedule;
4495   ptr->prefetch_sse = x86_prefetch_sse;
4496   ptr->tune = ix86_tune;
4497   ptr->branch_cost = ix86_branch_cost;
4498   ptr->tune_defaulted = ix86_tune_defaulted;
4499   ptr->arch_specified = ix86_arch_specified;
4500   ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4501   ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4502   ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4503   ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4504   ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4505   ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4506   ptr->x_ix86_abi = opts->x_ix86_abi;
4507   ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4508   ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4509   ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4510   ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4511   ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4512   ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4513   ptr->x_ix86_pmode = opts->x_ix86_pmode;
4514   ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4515   ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4516   ptr->x_ix86_regparm = opts->x_ix86_regparm;
4517   ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4518   ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4519   ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4520   ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4521   ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4522   ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4523   ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4524   ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4525   ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4526   ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4527 
4528   /* The fields are char but the variables are not; make sure the
4529      values fit in the fields.  */
4530   gcc_assert (ptr->arch == ix86_arch);
4531   gcc_assert (ptr->schedule == ix86_schedule);
4532   gcc_assert (ptr->tune == ix86_tune);
4533   gcc_assert (ptr->branch_cost == ix86_branch_cost);
4534 }
4535 
4536 /* Restore the current options */
4537 
4538 static void
4539 ix86_function_specific_restore (struct gcc_options *opts,
4540 				struct cl_target_option *ptr)
4541 {
4542   enum processor_type old_tune = ix86_tune;
4543   enum processor_type old_arch = ix86_arch;
4544   unsigned int ix86_arch_mask;
4545   int i;
4546 
4547   /* We don't change -fPIC.  */
4548   opts->x_flag_pic = flag_pic;
4549 
4550   ix86_arch = (enum processor_type) ptr->arch;
4551   ix86_schedule = (enum attr_cpu) ptr->schedule;
4552   ix86_tune = (enum processor_type) ptr->tune;
4553   x86_prefetch_sse = ptr->prefetch_sse;
4554   opts->x_ix86_branch_cost = ptr->branch_cost;
4555   ix86_tune_defaulted = ptr->tune_defaulted;
4556   ix86_arch_specified = ptr->arch_specified;
4557   opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4558   opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4559   opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4560   opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4561   opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4562   opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4563   opts->x_ix86_abi = ptr->x_ix86_abi;
4564   opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4565   opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4566   opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4567   opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4568   opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4569   opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4570   opts->x_ix86_pmode = ptr->x_ix86_pmode;
4571   opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4572   opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4573   opts->x_ix86_regparm = ptr->x_ix86_regparm;
4574   opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4575   opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4576   opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4577   opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4578   opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4579   opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4580   opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4581   opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4582   opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4583   opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4584   ix86_tune_cost = processor_target_table[ix86_tune].cost;
4585   /* TODO: ix86_cost should be chosen at instruction or function granuality
4586      so for cold code we use size_cost even in !optimize_size compilation.  */
4587   if (opts->x_optimize_size)
4588     ix86_cost = &ix86_size_cost;
4589   else
4590     ix86_cost = ix86_tune_cost;
4591 
4592   /* Recreate the arch feature tests if the arch changed */
4593   if (old_arch != ix86_arch)
4594     {
4595       ix86_arch_mask = 1u << ix86_arch;
4596       for (i = 0; i < X86_ARCH_LAST; ++i)
4597 	ix86_arch_features[i]
4598 	  = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4599     }
4600 
4601   /* Recreate the tune optimization tests */
4602   if (old_tune != ix86_tune)
4603     set_ix86_tune_features (ix86_tune, false);
4604 }
4605 
4606 /* Adjust target options after streaming them in.  This is mainly about
4607    reconciling them with global options.  */
4608 
4609 static void
4610 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4611 {
4612   /* flag_pic is a global option, but ix86_cmodel is target saved option
4613      partly computed from flag_pic.  If flag_pic is on, adjust x_ix86_cmodel
4614      for PIC, or error out.  */
4615   if (flag_pic)
4616     switch (ptr->x_ix86_cmodel)
4617       {
4618       case CM_SMALL:
4619 	ptr->x_ix86_cmodel = CM_SMALL_PIC;
4620 	break;
4621 
4622       case CM_MEDIUM:
4623 	ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4624 	break;
4625 
4626       case CM_LARGE:
4627 	ptr->x_ix86_cmodel = CM_LARGE_PIC;
4628 	break;
4629 
4630       case CM_KERNEL:
4631 	error ("code model %s does not support PIC mode", "kernel");
4632 	break;
4633 
4634       default:
4635 	break;
4636       }
4637   else
4638     switch (ptr->x_ix86_cmodel)
4639       {
4640       case CM_SMALL_PIC:
4641 	ptr->x_ix86_cmodel = CM_SMALL;
4642 	break;
4643 
4644       case CM_MEDIUM_PIC:
4645 	ptr->x_ix86_cmodel = CM_MEDIUM;
4646 	break;
4647 
4648       case CM_LARGE_PIC:
4649 	ptr->x_ix86_cmodel = CM_LARGE;
4650 	break;
4651 
4652       default:
4653 	break;
4654       }
4655 }
4656 
4657 /* Print the current options */
4658 
4659 static void
4660 ix86_function_specific_print (FILE *file, int indent,
4661 			      struct cl_target_option *ptr)
4662 {
4663   char *target_string
4664     = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4665 			  NULL, NULL, ptr->x_ix86_fpmath, false);
4666 
4667   gcc_assert (ptr->arch < PROCESSOR_max);
4668   fprintf (file, "%*sarch = %d (%s)\n",
4669 	   indent, "",
4670 	   ptr->arch, processor_target_table[ptr->arch].name);
4671 
4672   gcc_assert (ptr->tune < PROCESSOR_max);
4673   fprintf (file, "%*stune = %d (%s)\n",
4674 	   indent, "",
4675 	   ptr->tune, processor_target_table[ptr->tune].name);
4676 
4677   fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4678 
4679   if (target_string)
4680     {
4681       fprintf (file, "%*s%s\n", indent, "", target_string);
4682       free (target_string);
4683     }
4684 }
4685 
4686 
4687 /* Inner function to process the attribute((target(...))), take an argument and
4688    set the current options from the argument. If we have a list, recursively go
4689    over the list.  */
4690 
4691 static bool
4692 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4693 				     struct gcc_options *opts,
4694 				     struct gcc_options *opts_set,
4695 				     struct gcc_options *enum_opts_set)
4696 {
4697   char *next_optstr;
4698   bool ret = true;
4699 
4700 #define IX86_ATTR_ISA(S,O)   { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4701 #define IX86_ATTR_STR(S,O)   { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4702 #define IX86_ATTR_ENUM(S,O)  { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4703 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4704 #define IX86_ATTR_NO(S,O,M)  { S, sizeof (S)-1, ix86_opt_no,  O, M }
4705 
4706   enum ix86_opt_type
4707   {
4708     ix86_opt_unknown,
4709     ix86_opt_yes,
4710     ix86_opt_no,
4711     ix86_opt_str,
4712     ix86_opt_enum,
4713     ix86_opt_isa
4714   };
4715 
4716   static const struct
4717   {
4718     const char *string;
4719     size_t len;
4720     enum ix86_opt_type type;
4721     int opt;
4722     int mask;
4723   } attrs[] = {
4724     /* isa options */
4725     IX86_ATTR_ISA ("3dnow",	OPT_m3dnow),
4726     IX86_ATTR_ISA ("abm",	OPT_mabm),
4727     IX86_ATTR_ISA ("bmi",	OPT_mbmi),
4728     IX86_ATTR_ISA ("bmi2",	OPT_mbmi2),
4729     IX86_ATTR_ISA ("lzcnt",	OPT_mlzcnt),
4730     IX86_ATTR_ISA ("tbm",	OPT_mtbm),
4731     IX86_ATTR_ISA ("aes",	OPT_maes),
4732     IX86_ATTR_ISA ("sha",	OPT_msha),
4733     IX86_ATTR_ISA ("avx",	OPT_mavx),
4734     IX86_ATTR_ISA ("avx2",	OPT_mavx2),
4735     IX86_ATTR_ISA ("avx512f",	OPT_mavx512f),
4736     IX86_ATTR_ISA ("avx512pf",	OPT_mavx512pf),
4737     IX86_ATTR_ISA ("avx512er",	OPT_mavx512er),
4738     IX86_ATTR_ISA ("avx512cd",	OPT_mavx512cd),
4739     IX86_ATTR_ISA ("avx512dq",	OPT_mavx512dq),
4740     IX86_ATTR_ISA ("avx512bw",	OPT_mavx512bw),
4741     IX86_ATTR_ISA ("avx512vl",	OPT_mavx512vl),
4742     IX86_ATTR_ISA ("mmx",	OPT_mmmx),
4743     IX86_ATTR_ISA ("pclmul",	OPT_mpclmul),
4744     IX86_ATTR_ISA ("popcnt",	OPT_mpopcnt),
4745     IX86_ATTR_ISA ("sse",	OPT_msse),
4746     IX86_ATTR_ISA ("sse2",	OPT_msse2),
4747     IX86_ATTR_ISA ("sse3",	OPT_msse3),
4748     IX86_ATTR_ISA ("sse4",	OPT_msse4),
4749     IX86_ATTR_ISA ("sse4.1",	OPT_msse4_1),
4750     IX86_ATTR_ISA ("sse4.2",	OPT_msse4_2),
4751     IX86_ATTR_ISA ("sse4a",	OPT_msse4a),
4752     IX86_ATTR_ISA ("ssse3",	OPT_mssse3),
4753     IX86_ATTR_ISA ("fma4",	OPT_mfma4),
4754     IX86_ATTR_ISA ("fma",	OPT_mfma),
4755     IX86_ATTR_ISA ("xop",	OPT_mxop),
4756     IX86_ATTR_ISA ("lwp",	OPT_mlwp),
4757     IX86_ATTR_ISA ("fsgsbase",	OPT_mfsgsbase),
4758     IX86_ATTR_ISA ("rdrnd",	OPT_mrdrnd),
4759     IX86_ATTR_ISA ("f16c",	OPT_mf16c),
4760     IX86_ATTR_ISA ("rtm",	OPT_mrtm),
4761     IX86_ATTR_ISA ("hle",	OPT_mhle),
4762     IX86_ATTR_ISA ("prfchw",	OPT_mprfchw),
4763     IX86_ATTR_ISA ("rdseed",	OPT_mrdseed),
4764     IX86_ATTR_ISA ("adx",	OPT_madx),
4765     IX86_ATTR_ISA ("fxsr",	OPT_mfxsr),
4766     IX86_ATTR_ISA ("xsave",	OPT_mxsave),
4767     IX86_ATTR_ISA ("xsaveopt",	OPT_mxsaveopt),
4768     IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4769     IX86_ATTR_ISA ("clflushopt",	OPT_mclflushopt),
4770     IX86_ATTR_ISA ("xsavec",	OPT_mxsavec),
4771     IX86_ATTR_ISA ("xsaves",	OPT_mxsaves),
4772     IX86_ATTR_ISA ("avx512vbmi",	OPT_mavx512vbmi),
4773     IX86_ATTR_ISA ("avx512ifma",	OPT_mavx512ifma),
4774     IX86_ATTR_ISA ("clwb",	OPT_mclwb),
4775     IX86_ATTR_ISA ("pcommit",	OPT_mpcommit),
4776     IX86_ATTR_ISA ("mwaitx",	OPT_mmwaitx),
4777 
4778     /* enum options */
4779     IX86_ATTR_ENUM ("fpmath=",	OPT_mfpmath_),
4780 
4781     /* string options */
4782     IX86_ATTR_STR ("arch=",	IX86_FUNCTION_SPECIFIC_ARCH),
4783     IX86_ATTR_STR ("tune=",	IX86_FUNCTION_SPECIFIC_TUNE),
4784 
4785     /* flag options */
4786     IX86_ATTR_YES ("cld",
4787 		   OPT_mcld,
4788 		   MASK_CLD),
4789 
4790     IX86_ATTR_NO ("fancy-math-387",
4791 		  OPT_mfancy_math_387,
4792 		  MASK_NO_FANCY_MATH_387),
4793 
4794     IX86_ATTR_YES ("ieee-fp",
4795 		   OPT_mieee_fp,
4796 		   MASK_IEEE_FP),
4797 
4798     IX86_ATTR_YES ("inline-all-stringops",
4799 		   OPT_minline_all_stringops,
4800 		   MASK_INLINE_ALL_STRINGOPS),
4801 
4802     IX86_ATTR_YES ("inline-stringops-dynamically",
4803 		   OPT_minline_stringops_dynamically,
4804 		   MASK_INLINE_STRINGOPS_DYNAMICALLY),
4805 
4806     IX86_ATTR_NO ("align-stringops",
4807 		  OPT_mno_align_stringops,
4808 		  MASK_NO_ALIGN_STRINGOPS),
4809 
4810     IX86_ATTR_YES ("recip",
4811 		   OPT_mrecip,
4812 		   MASK_RECIP),
4813 
4814   };
4815 
4816   /* If this is a list, recurse to get the options.  */
4817   if (TREE_CODE (args) == TREE_LIST)
4818     {
4819       bool ret = true;
4820 
4821       for (; args; args = TREE_CHAIN (args))
4822 	if (TREE_VALUE (args)
4823 	    && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4824 						     p_strings, opts, opts_set,
4825 						     enum_opts_set))
4826 	  ret = false;
4827 
4828       return ret;
4829     }
4830 
4831   else if (TREE_CODE (args) != STRING_CST)
4832     {
4833       error ("attribute %<target%> argument not a string");
4834       return false;
4835     }
4836 
4837   /* Handle multiple arguments separated by commas.  */
4838   next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4839 
4840   while (next_optstr && *next_optstr != '\0')
4841     {
4842       char *p = next_optstr;
4843       char *orig_p = p;
4844       char *comma = strchr (next_optstr, ',');
4845       const char *opt_string;
4846       size_t len, opt_len;
4847       int opt;
4848       bool opt_set_p;
4849       char ch;
4850       unsigned i;
4851       enum ix86_opt_type type = ix86_opt_unknown;
4852       int mask = 0;
4853 
4854       if (comma)
4855 	{
4856 	  *comma = '\0';
4857 	  len = comma - next_optstr;
4858 	  next_optstr = comma + 1;
4859 	}
4860       else
4861 	{
4862 	  len = strlen (p);
4863 	  next_optstr = NULL;
4864 	}
4865 
4866       /* Recognize no-xxx.  */
4867       if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4868 	{
4869 	  opt_set_p = false;
4870 	  p += 3;
4871 	  len -= 3;
4872 	}
4873       else
4874 	opt_set_p = true;
4875 
4876       /* Find the option.  */
4877       ch = *p;
4878       opt = N_OPTS;
4879       for (i = 0; i < ARRAY_SIZE (attrs); i++)
4880 	{
4881 	  type = attrs[i].type;
4882 	  opt_len = attrs[i].len;
4883 	  if (ch == attrs[i].string[0]
4884 	      && ((type != ix86_opt_str && type != ix86_opt_enum)
4885 		  ? len == opt_len
4886 		  : len > opt_len)
4887 	      && memcmp (p, attrs[i].string, opt_len) == 0)
4888 	    {
4889 	      opt = attrs[i].opt;
4890 	      mask = attrs[i].mask;
4891 	      opt_string = attrs[i].string;
4892 	      break;
4893 	    }
4894 	}
4895 
4896       /* Process the option.  */
4897       if (opt == N_OPTS)
4898 	{
4899 	  error ("attribute(target(\"%s\")) is unknown", orig_p);
4900 	  ret = false;
4901 	}
4902 
4903       else if (type == ix86_opt_isa)
4904 	{
4905 	  struct cl_decoded_option decoded;
4906 
4907 	  generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4908 	  ix86_handle_option (opts, opts_set,
4909 			      &decoded, input_location);
4910 	}
4911 
4912       else if (type == ix86_opt_yes || type == ix86_opt_no)
4913 	{
4914 	  if (type == ix86_opt_no)
4915 	    opt_set_p = !opt_set_p;
4916 
4917 	  if (opt_set_p)
4918 	    opts->x_target_flags |= mask;
4919 	  else
4920 	    opts->x_target_flags &= ~mask;
4921 	}
4922 
4923       else if (type == ix86_opt_str)
4924 	{
4925 	  if (p_strings[opt])
4926 	    {
4927 	      error ("option(\"%s\") was already specified", opt_string);
4928 	      ret = false;
4929 	    }
4930 	  else
4931 	    p_strings[opt] = xstrdup (p + opt_len);
4932 	}
4933 
4934       else if (type == ix86_opt_enum)
4935 	{
4936 	  bool arg_ok;
4937 	  int value;
4938 
4939 	  arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4940 	  if (arg_ok)
4941 	    set_option (opts, enum_opts_set, opt, value,
4942 			p + opt_len, DK_UNSPECIFIED, input_location,
4943 			global_dc);
4944 	  else
4945 	    {
4946 	      error ("attribute(target(\"%s\")) is unknown", orig_p);
4947 	      ret = false;
4948 	    }
4949 	}
4950 
4951       else
4952 	gcc_unreachable ();
4953     }
4954 
4955   return ret;
4956 }
4957 
4958 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
4959 
4960 tree
4961 ix86_valid_target_attribute_tree (tree args,
4962 				  struct gcc_options *opts,
4963 				  struct gcc_options *opts_set)
4964 {
4965   const char *orig_arch_string = opts->x_ix86_arch_string;
4966   const char *orig_tune_string = opts->x_ix86_tune_string;
4967   enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4968   int orig_tune_defaulted = ix86_tune_defaulted;
4969   int orig_arch_specified = ix86_arch_specified;
4970   char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4971   tree t = NULL_TREE;
4972   int i;
4973   struct cl_target_option *def
4974     = TREE_TARGET_OPTION (target_option_default_node);
4975   struct gcc_options enum_opts_set;
4976 
4977   memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4978 
4979   /* Process each of the options on the chain.  */
4980   if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4981 					     opts_set, &enum_opts_set))
4982     return error_mark_node;
4983 
4984   /* If the changed options are different from the default, rerun
4985      ix86_option_override_internal, and then save the options away.
4986      The string options are are attribute options, and will be undone
4987      when we copy the save structure.  */
4988   if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4989       || opts->x_target_flags != def->x_target_flags
4990       || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4991       || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4992       || enum_opts_set.x_ix86_fpmath)
4993     {
4994       /* If we are using the default tune= or arch=, undo the string assigned,
4995 	 and use the default.  */
4996       if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4997 	opts->x_ix86_arch_string
4998 	  = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
4999       else if (!orig_arch_specified)
5000 	opts->x_ix86_arch_string = NULL;
5001 
5002       if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5003 	opts->x_ix86_tune_string
5004 	  = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
5005       else if (orig_tune_defaulted)
5006 	opts->x_ix86_tune_string = NULL;
5007 
5008       /* If fpmath= is not set, and we now have sse2 on 32-bit, use it.  */
5009       if (enum_opts_set.x_ix86_fpmath)
5010 	opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5011       else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5012 	       && TARGET_SSE_P (opts->x_ix86_isa_flags))
5013 	{
5014 	  opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
5015 	  opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5016 	}
5017 
5018       /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
5019       ix86_option_override_internal (false, opts, opts_set);
5020 
5021       /* Add any builtin functions with the new isa if any.  */
5022       ix86_add_new_builtins (opts->x_ix86_isa_flags);
5023 
5024       /* Save the current options unless we are validating options for
5025 	 #pragma.  */
5026       t = build_target_option_node (opts);
5027 
5028       opts->x_ix86_arch_string = orig_arch_string;
5029       opts->x_ix86_tune_string = orig_tune_string;
5030       opts_set->x_ix86_fpmath = orig_fpmath_set;
5031 
5032       /* Free up memory allocated to hold the strings */
5033       for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5034 	free (option_strings[i]);
5035     }
5036 
5037   return t;
5038 }
5039 
5040 /* Hook to validate attribute((target("string"))).  */
5041 
5042 static bool
5043 ix86_valid_target_attribute_p (tree fndecl,
5044 			       tree ARG_UNUSED (name),
5045 			       tree args,
5046 			       int ARG_UNUSED (flags))
5047 {
5048   struct gcc_options func_options;
5049   tree new_target, new_optimize;
5050   bool ret = true;
5051 
5052   /* attribute((target("default"))) does nothing, beyond
5053      affecting multi-versioning.  */
5054   if (TREE_VALUE (args)
5055       && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5056       && TREE_CHAIN (args) == NULL_TREE
5057       && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5058     return true;
5059 
5060   tree old_optimize = build_optimization_node (&global_options);
5061 
5062   /* Get the optimization options of the current function.  */
5063   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5064 
5065   if (!func_optimize)
5066     func_optimize = old_optimize;
5067 
5068   /* Init func_options.  */
5069   memset (&func_options, 0, sizeof (func_options));
5070   init_options_struct (&func_options, NULL);
5071   lang_hooks.init_options_struct (&func_options);
5072 
5073   cl_optimization_restore (&func_options,
5074 			   TREE_OPTIMIZATION (func_optimize));
5075 
5076   /* Initialize func_options to the default before its target options can
5077      be set.  */
5078   cl_target_option_restore (&func_options,
5079 			    TREE_TARGET_OPTION (target_option_default_node));
5080 
5081   new_target = ix86_valid_target_attribute_tree (args, &func_options,
5082 						 &global_options_set);
5083 
5084   new_optimize = build_optimization_node (&func_options);
5085 
5086   if (new_target == error_mark_node)
5087     ret = false;
5088 
5089   else if (fndecl && new_target)
5090     {
5091       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5092 
5093       if (old_optimize != new_optimize)
5094 	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5095     }
5096 
5097   return ret;
5098 }
5099 
5100 
5101 /* Hook to determine if one function can safely inline another.  */
5102 
5103 static bool
5104 ix86_can_inline_p (tree caller, tree callee)
5105 {
5106   bool ret = false;
5107   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5108   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5109 
5110   /* If callee has no option attributes, then it is ok to inline.  */
5111   if (!callee_tree)
5112     ret = true;
5113 
5114   /* If caller has no option attributes, but callee does then it is not ok to
5115      inline.  */
5116   else if (!caller_tree)
5117     ret = false;
5118 
5119   else
5120     {
5121       struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5122       struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5123 
5124       /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5125 	 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5126 	 function.  */
5127       if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5128 	  != callee_opts->x_ix86_isa_flags)
5129 	ret = false;
5130 
5131       /* See if we have the same non-isa options.  */
5132       else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5133 	ret = false;
5134 
5135       /* See if arch, tune, etc. are the same.  */
5136       else if (caller_opts->arch != callee_opts->arch)
5137 	ret = false;
5138 
5139       else if (caller_opts->tune != callee_opts->tune)
5140 	ret = false;
5141 
5142       else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5143 	ret = false;
5144 
5145       else if (caller_opts->branch_cost != callee_opts->branch_cost)
5146 	ret = false;
5147 
5148       else
5149 	ret = true;
5150     }
5151 
5152   return ret;
5153 }
5154 
5155 
5156 /* Remember the last target of ix86_set_current_function.  */
5157 static GTY(()) tree ix86_previous_fndecl;
5158 
5159 /* Set targets globals to the default (or current #pragma GCC target
5160    if active).  Invalidate ix86_previous_fndecl cache.  */
5161 
5162 void
5163 ix86_reset_previous_fndecl (void)
5164 {
5165   tree new_tree = target_option_current_node;
5166   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5167   if (TREE_TARGET_GLOBALS (new_tree))
5168     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5169   else if (new_tree == target_option_default_node)
5170     restore_target_globals (&default_target_globals);
5171   else
5172     TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5173   ix86_previous_fndecl = NULL_TREE;
5174 }
5175 
5176 /* Establish appropriate back-end context for processing the function
5177    FNDECL.  The argument might be NULL to indicate processing at top
5178    level, outside of any function scope.  */
5179 static void
5180 ix86_set_current_function (tree fndecl)
5181 {
5182   /* Only change the context if the function changes.  This hook is called
5183      several times in the course of compiling a function, and we don't want to
5184      slow things down too much or call target_reinit when it isn't safe.  */
5185   if (fndecl == ix86_previous_fndecl)
5186     return;
5187 
5188   tree old_tree;
5189   if (ix86_previous_fndecl == NULL_TREE)
5190     old_tree = target_option_current_node;
5191   else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5192     old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5193   else
5194     old_tree = target_option_default_node;
5195 
5196   if (fndecl == NULL_TREE)
5197     {
5198       if (old_tree != target_option_current_node)
5199 	ix86_reset_previous_fndecl ();
5200       return;
5201     }
5202 
5203   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5204   if (new_tree == NULL_TREE)
5205     new_tree = target_option_default_node;
5206 
5207   if (old_tree != new_tree)
5208     {
5209       cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5210       if (TREE_TARGET_GLOBALS (new_tree))
5211 	restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5212       else if (new_tree == target_option_default_node)
5213 	restore_target_globals (&default_target_globals);
5214       else
5215 	TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5216     }
5217   ix86_previous_fndecl = fndecl;
5218 
5219   /* 64-bit MS and SYSV ABI have different set of call used registers.
5220      Avoid expensive re-initialization of init_regs each time we switch
5221      function context.  */
5222   if (TARGET_64BIT
5223       && (call_used_regs[SI_REG]
5224 	  == (cfun->machine->call_abi == MS_ABI)))
5225     reinit_regs ();
5226 }
5227 
5228 
5229 /* Return true if this goes in large data/bss.  */
5230 
5231 static bool
5232 ix86_in_large_data_p (tree exp)
5233 {
5234   if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5235     return false;
5236 
5237   /* Functions are never large data.  */
5238   if (TREE_CODE (exp) == FUNCTION_DECL)
5239     return false;
5240 
5241   /* Automatic variables are never large data.  */
5242   if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5243     return false;
5244 
5245   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5246     {
5247       const char *section = DECL_SECTION_NAME (exp);
5248       if (strcmp (section, ".ldata") == 0
5249 	  || strcmp (section, ".lbss") == 0)
5250 	return true;
5251       return false;
5252     }
5253   else
5254     {
5255       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5256 
5257       /* If this is an incomplete type with size 0, then we can't put it
5258 	 in data because it might be too big when completed.  Also,
5259 	 int_size_in_bytes returns -1 if size can vary or is larger than
5260 	 an integer in which case also it is safer to assume that it goes in
5261 	 large data.  */
5262       if (size <= 0 || size > ix86_section_threshold)
5263 	return true;
5264     }
5265 
5266   return false;
5267 }
5268 
5269 /* Switch to the appropriate section for output of DECL.
5270    DECL is either a `VAR_DECL' node or a constant of some sort.
5271    RELOC indicates whether forming the initial value of DECL requires
5272    link-time relocations.  */
5273 
5274 ATTRIBUTE_UNUSED static section *
5275 x86_64_elf_select_section (tree decl, int reloc,
5276 			   unsigned HOST_WIDE_INT align)
5277 {
5278   if (ix86_in_large_data_p (decl))
5279     {
5280       const char *sname = NULL;
5281       unsigned int flags = SECTION_WRITE;
5282       switch (categorize_decl_for_section (decl, reloc))
5283 	{
5284 	case SECCAT_DATA:
5285 	  sname = ".ldata";
5286 	  break;
5287 	case SECCAT_DATA_REL:
5288 	  sname = ".ldata.rel";
5289 	  break;
5290 	case SECCAT_DATA_REL_LOCAL:
5291 	  sname = ".ldata.rel.local";
5292 	  break;
5293 	case SECCAT_DATA_REL_RO:
5294 	  sname = ".ldata.rel.ro";
5295 	  break;
5296 	case SECCAT_DATA_REL_RO_LOCAL:
5297 	  sname = ".ldata.rel.ro.local";
5298 	  break;
5299 	case SECCAT_BSS:
5300 	  sname = ".lbss";
5301 	  flags |= SECTION_BSS;
5302 	  break;
5303 	case SECCAT_RODATA:
5304 	case SECCAT_RODATA_MERGE_STR:
5305 	case SECCAT_RODATA_MERGE_STR_INIT:
5306 	case SECCAT_RODATA_MERGE_CONST:
5307 	  sname = ".lrodata";
5308 	  flags = 0;
5309 	  break;
5310 	case SECCAT_SRODATA:
5311 	case SECCAT_SDATA:
5312 	case SECCAT_SBSS:
5313 	  gcc_unreachable ();
5314 	case SECCAT_TEXT:
5315 	case SECCAT_TDATA:
5316 	case SECCAT_TBSS:
5317 	  /* We don't split these for medium model.  Place them into
5318 	     default sections and hope for best.  */
5319 	  break;
5320 	}
5321       if (sname)
5322 	{
5323 	  /* We might get called with string constants, but get_named_section
5324 	     doesn't like them as they are not DECLs.  Also, we need to set
5325 	     flags in that case.  */
5326 	  if (!DECL_P (decl))
5327 	    return get_section (sname, flags, NULL);
5328 	  return get_named_section (decl, sname, reloc);
5329 	}
5330     }
5331   return default_elf_select_section (decl, reloc, align);
5332 }
5333 
5334 /* Select a set of attributes for section NAME based on the properties
5335    of DECL and whether or not RELOC indicates that DECL's initializer
5336    might contain runtime relocations.  */
5337 
5338 static unsigned int ATTRIBUTE_UNUSED
5339 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5340 {
5341   unsigned int flags = default_section_type_flags (decl, name, reloc);
5342 
5343   if (decl == NULL_TREE
5344       && (strcmp (name, ".ldata.rel.ro") == 0
5345 	  || strcmp (name, ".ldata.rel.ro.local") == 0))
5346     flags |= SECTION_RELRO;
5347 
5348   if (strcmp (name, ".lbss") == 0
5349       || strncmp (name, ".lbss.", 5) == 0
5350       || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5351     flags |= SECTION_BSS;
5352 
5353   return flags;
5354 }
5355 
5356 /* Build up a unique section name, expressed as a
5357    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5358    RELOC indicates whether the initial value of EXP requires
5359    link-time relocations.  */
5360 
5361 static void ATTRIBUTE_UNUSED
5362 x86_64_elf_unique_section (tree decl, int reloc)
5363 {
5364   if (ix86_in_large_data_p (decl))
5365     {
5366       const char *prefix = NULL;
5367       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
5368       bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5369 
5370       switch (categorize_decl_for_section (decl, reloc))
5371 	{
5372 	case SECCAT_DATA:
5373 	case SECCAT_DATA_REL:
5374 	case SECCAT_DATA_REL_LOCAL:
5375 	case SECCAT_DATA_REL_RO:
5376 	case SECCAT_DATA_REL_RO_LOCAL:
5377           prefix = one_only ? ".ld" : ".ldata";
5378 	  break;
5379 	case SECCAT_BSS:
5380           prefix = one_only ? ".lb" : ".lbss";
5381 	  break;
5382 	case SECCAT_RODATA:
5383 	case SECCAT_RODATA_MERGE_STR:
5384 	case SECCAT_RODATA_MERGE_STR_INIT:
5385 	case SECCAT_RODATA_MERGE_CONST:
5386           prefix = one_only ? ".lr" : ".lrodata";
5387 	  break;
5388 	case SECCAT_SRODATA:
5389 	case SECCAT_SDATA:
5390 	case SECCAT_SBSS:
5391 	  gcc_unreachable ();
5392 	case SECCAT_TEXT:
5393 	case SECCAT_TDATA:
5394 	case SECCAT_TBSS:
5395 	  /* We don't split these for medium model.  Place them into
5396 	     default sections and hope for best.  */
5397 	  break;
5398 	}
5399       if (prefix)
5400 	{
5401 	  const char *name, *linkonce;
5402 	  char *string;
5403 
5404 	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5405 	  name = targetm.strip_name_encoding (name);
5406 
5407 	  /* If we're using one_only, then there needs to be a .gnu.linkonce
5408      	     prefix to the section name.  */
5409 	  linkonce = one_only ? ".gnu.linkonce" : "";
5410 
5411 	  string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5412 
5413 	  set_decl_section_name (decl, string);
5414 	  return;
5415 	}
5416     }
5417   default_unique_section (decl, reloc);
5418 }
5419 
5420 #ifdef COMMON_ASM_OP
5421 /* This says how to output assembler code to declare an
5422    uninitialized external linkage data object.
5423 
5424    For medium model x86-64 we need to use .largecomm opcode for
5425    large objects.  */
5426 void
5427 x86_elf_aligned_common (FILE *file,
5428 			const char *name, unsigned HOST_WIDE_INT size,
5429 			int align)
5430 {
5431   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5432       && size > (unsigned int)ix86_section_threshold)
5433     fputs ("\t.largecomm\t", file);
5434   else
5435     fputs (COMMON_ASM_OP, file);
5436   assemble_name (file, name);
5437   fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5438 	   size, align / BITS_PER_UNIT);
5439 }
5440 #endif
5441 
5442 /* Utility function for targets to use in implementing
5443    ASM_OUTPUT_ALIGNED_BSS.  */
5444 
5445 void
5446 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5447 		       	unsigned HOST_WIDE_INT size, int align)
5448 {
5449   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5450       && size > (unsigned int)ix86_section_threshold)
5451     switch_to_section (get_named_section (decl, ".lbss", 0));
5452   else
5453     switch_to_section (bss_section);
5454   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5455 #ifdef ASM_DECLARE_OBJECT_NAME
5456   last_assemble_variable_decl = decl;
5457   ASM_DECLARE_OBJECT_NAME (file, name, decl);
5458 #else
5459   /* Standard thing is just output label for the object.  */
5460   ASM_OUTPUT_LABEL (file, name);
5461 #endif /* ASM_DECLARE_OBJECT_NAME */
5462   ASM_OUTPUT_SKIP (file, size ? size : 1);
5463 }
5464 
5465 /* Decide whether we must probe the stack before any space allocation
5466    on this target.  It's essentially TARGET_STACK_PROBE except when
5467    -fstack-check causes the stack to be already probed differently.  */
5468 
5469 bool
5470 ix86_target_stack_probe (void)
5471 {
5472   /* Do not probe the stack twice if static stack checking is enabled.  */
5473   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5474     return false;
5475 
5476   return TARGET_STACK_PROBE;
5477 }
5478 
5479 /* Decide whether we can make a sibling call to a function.  DECL is the
5480    declaration of the function being targeted by the call and EXP is the
5481    CALL_EXPR representing the call.  */
5482 
5483 static bool
5484 ix86_function_ok_for_sibcall (tree decl, tree exp)
5485 {
5486   tree type, decl_or_type;
5487   rtx a, b;
5488 
5489   /* If we are generating position-independent code, we cannot sibcall
5490      optimize any indirect call, or a direct call to a global function,
5491      as the PLT requires %ebx be live. (Darwin does not have a PLT.)  */
5492   if (!TARGET_MACHO
5493       && !TARGET_64BIT
5494       && flag_pic
5495       && (!decl || !targetm.binds_local_p (decl)))
5496     return false;
5497 
5498   /* If we need to align the outgoing stack, then sibcalling would
5499      unalign the stack, which may break the called function.  */
5500   if (ix86_minimum_incoming_stack_boundary (true)
5501       < PREFERRED_STACK_BOUNDARY)
5502     return false;
5503 
5504   if (decl)
5505     {
5506       decl_or_type = decl;
5507       type = TREE_TYPE (decl);
5508     }
5509   else
5510     {
5511       /* We're looking at the CALL_EXPR, we need the type of the function.  */
5512       type = CALL_EXPR_FN (exp);		/* pointer expression */
5513       type = TREE_TYPE (type);			/* pointer type */
5514       type = TREE_TYPE (type);			/* function type */
5515       decl_or_type = type;
5516     }
5517 
5518   /* Check that the return value locations are the same.  Like
5519      if we are returning floats on the 80387 register stack, we cannot
5520      make a sibcall from a function that doesn't return a float to a
5521      function that does or, conversely, from a function that does return
5522      a float to a function that doesn't; the necessary stack adjustment
5523      would not be executed.  This is also the place we notice
5524      differences in the return value ABI.  Note that it is ok for one
5525      of the functions to have void return type as long as the return
5526      value of the other is passed in a register.  */
5527   a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5528   b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5529 			   cfun->decl, false);
5530   if (STACK_REG_P (a) || STACK_REG_P (b))
5531     {
5532       if (!rtx_equal_p (a, b))
5533 	return false;
5534     }
5535   else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5536     ;
5537   else if (!rtx_equal_p (a, b))
5538     return false;
5539 
5540   if (TARGET_64BIT)
5541     {
5542       /* The SYSV ABI has more call-clobbered registers;
5543 	 disallow sibcalls from MS to SYSV.  */
5544       if (cfun->machine->call_abi == MS_ABI
5545 	  && ix86_function_type_abi (type) == SYSV_ABI)
5546 	return false;
5547     }
5548   else
5549     {
5550       /* If this call is indirect, we'll need to be able to use a
5551 	 call-clobbered register for the address of the target function.
5552 	 Make sure that all such registers are not used for passing
5553 	 parameters.  Note that DLLIMPORT functions are indirect.  */
5554       if (!decl
5555 	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5556 	{
5557 	  if (ix86_function_regparm (type, NULL) >= 3)
5558 	    {
5559 	      /* ??? Need to count the actual number of registers to be used,
5560 		 not the possible number of registers.  Fix later.  */
5561 	      return false;
5562 	    }
5563 	}
5564     }
5565 
5566   /* Otherwise okay.  That also includes certain types of indirect calls.  */
5567   return true;
5568 }
5569 
5570 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5571    and "sseregparm" calling convention attributes;
5572    arguments as in struct attribute_spec.handler.  */
5573 
5574 static tree
5575 ix86_handle_cconv_attribute (tree *node, tree name,
5576 				   tree args,
5577 				   int,
5578 				   bool *no_add_attrs)
5579 {
5580   if (TREE_CODE (*node) != FUNCTION_TYPE
5581       && TREE_CODE (*node) != METHOD_TYPE
5582       && TREE_CODE (*node) != FIELD_DECL
5583       && TREE_CODE (*node) != TYPE_DECL)
5584     {
5585       warning (OPT_Wattributes, "%qE attribute only applies to functions",
5586 	       name);
5587       *no_add_attrs = true;
5588       return NULL_TREE;
5589     }
5590 
5591   /* Can combine regparm with all attributes but fastcall, and thiscall.  */
5592   if (is_attribute_p ("regparm", name))
5593     {
5594       tree cst;
5595 
5596       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5597         {
5598 	  error ("fastcall and regparm attributes are not compatible");
5599 	}
5600 
5601       if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5602 	{
5603 	  error ("regparam and thiscall attributes are not compatible");
5604 	}
5605 
5606       cst = TREE_VALUE (args);
5607       if (TREE_CODE (cst) != INTEGER_CST)
5608 	{
5609 	  warning (OPT_Wattributes,
5610 		   "%qE attribute requires an integer constant argument",
5611 		   name);
5612 	  *no_add_attrs = true;
5613 	}
5614       else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5615 	{
5616 	  warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5617 		   name, REGPARM_MAX);
5618 	  *no_add_attrs = true;
5619 	}
5620 
5621       return NULL_TREE;
5622     }
5623 
5624   if (TARGET_64BIT)
5625     {
5626       /* Do not warn when emulating the MS ABI.  */
5627       if ((TREE_CODE (*node) != FUNCTION_TYPE
5628 	   && TREE_CODE (*node) != METHOD_TYPE)
5629 	  || ix86_function_type_abi (*node) != MS_ABI)
5630 	warning (OPT_Wattributes, "%qE attribute ignored",
5631 	         name);
5632       *no_add_attrs = true;
5633       return NULL_TREE;
5634     }
5635 
5636   /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
5637   if (is_attribute_p ("fastcall", name))
5638     {
5639       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5640         {
5641 	  error ("fastcall and cdecl attributes are not compatible");
5642 	}
5643       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5644         {
5645 	  error ("fastcall and stdcall attributes are not compatible");
5646 	}
5647       if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5648         {
5649 	  error ("fastcall and regparm attributes are not compatible");
5650 	}
5651       if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5652 	{
5653 	  error ("fastcall and thiscall attributes are not compatible");
5654 	}
5655     }
5656 
5657   /* Can combine stdcall with fastcall (redundant), regparm and
5658      sseregparm.  */
5659   else if (is_attribute_p ("stdcall", name))
5660     {
5661       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5662         {
5663 	  error ("stdcall and cdecl attributes are not compatible");
5664 	}
5665       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5666         {
5667 	  error ("stdcall and fastcall attributes are not compatible");
5668 	}
5669       if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5670 	{
5671 	  error ("stdcall and thiscall attributes are not compatible");
5672 	}
5673     }
5674 
5675   /* Can combine cdecl with regparm and sseregparm.  */
5676   else if (is_attribute_p ("cdecl", name))
5677     {
5678       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5679         {
5680 	  error ("stdcall and cdecl attributes are not compatible");
5681 	}
5682       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5683         {
5684 	  error ("fastcall and cdecl attributes are not compatible");
5685 	}
5686       if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5687 	{
5688 	  error ("cdecl and thiscall attributes are not compatible");
5689 	}
5690     }
5691   else if (is_attribute_p ("thiscall", name))
5692     {
5693       if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5694 	warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5695 	         name);
5696       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5697 	{
5698 	  error ("stdcall and thiscall attributes are not compatible");
5699 	}
5700       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5701 	{
5702 	  error ("fastcall and thiscall attributes are not compatible");
5703 	}
5704       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5705 	{
5706 	  error ("cdecl and thiscall attributes are not compatible");
5707 	}
5708     }
5709 
5710   /* Can combine sseregparm with all attributes.  */
5711 
5712   return NULL_TREE;
5713 }
5714 
5715 /* The transactional memory builtins are implicitly regparm or fastcall
5716    depending on the ABI.  Override the generic do-nothing attribute that
5717    these builtins were declared with, and replace it with one of the two
5718    attributes that we expect elsewhere.  */
5719 
5720 static tree
5721 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5722 				  int flags, bool *no_add_attrs)
5723 {
5724   tree alt;
5725 
5726   /* In no case do we want to add the placeholder attribute.  */
5727   *no_add_attrs = true;
5728 
5729   /* The 64-bit ABI is unchanged for transactional memory.  */
5730   if (TARGET_64BIT)
5731     return NULL_TREE;
5732 
5733   /* ??? Is there a better way to validate 32-bit windows?  We have
5734      cfun->machine->call_abi, but that seems to be set only for 64-bit.  */
5735   if (CHECK_STACK_LIMIT > 0)
5736     alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5737   else
5738     {
5739       alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5740       alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5741     }
5742   decl_attributes (node, alt, flags);
5743 
5744   return NULL_TREE;
5745 }
5746 
5747 /* This function determines from TYPE the calling-convention.  */
5748 
5749 unsigned int
5750 ix86_get_callcvt (const_tree type)
5751 {
5752   unsigned int ret = 0;
5753   bool is_stdarg;
5754   tree attrs;
5755 
5756   if (TARGET_64BIT)
5757     return IX86_CALLCVT_CDECL;
5758 
5759   attrs = TYPE_ATTRIBUTES (type);
5760   if (attrs != NULL_TREE)
5761     {
5762       if (lookup_attribute ("cdecl", attrs))
5763 	ret |= IX86_CALLCVT_CDECL;
5764       else if (lookup_attribute ("stdcall", attrs))
5765 	ret |= IX86_CALLCVT_STDCALL;
5766       else if (lookup_attribute ("fastcall", attrs))
5767 	ret |= IX86_CALLCVT_FASTCALL;
5768       else if (lookup_attribute ("thiscall", attrs))
5769 	ret |= IX86_CALLCVT_THISCALL;
5770 
5771       /* Regparam isn't allowed for thiscall and fastcall.  */
5772       if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5773 	{
5774 	  if (lookup_attribute ("regparm", attrs))
5775 	    ret |= IX86_CALLCVT_REGPARM;
5776 	  if (lookup_attribute ("sseregparm", attrs))
5777 	    ret |= IX86_CALLCVT_SSEREGPARM;
5778 	}
5779 
5780       if (IX86_BASE_CALLCVT(ret) != 0)
5781 	return ret;
5782     }
5783 
5784   is_stdarg = stdarg_p (type);
5785   if (TARGET_RTD && !is_stdarg)
5786     return IX86_CALLCVT_STDCALL | ret;
5787 
5788   if (ret != 0
5789       || is_stdarg
5790       || TREE_CODE (type) != METHOD_TYPE
5791       || ix86_function_type_abi (type) != MS_ABI)
5792     return IX86_CALLCVT_CDECL | ret;
5793 
5794   return IX86_CALLCVT_THISCALL;
5795 }
5796 
5797 /* Return 0 if the attributes for two types are incompatible, 1 if they
5798    are compatible, and 2 if they are nearly compatible (which causes a
5799    warning to be generated).  */
5800 
5801 static int
5802 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5803 {
5804   unsigned int ccvt1, ccvt2;
5805 
5806   if (TREE_CODE (type1) != FUNCTION_TYPE
5807       && TREE_CODE (type1) != METHOD_TYPE)
5808     return 1;
5809 
5810   ccvt1 = ix86_get_callcvt (type1);
5811   ccvt2 = ix86_get_callcvt (type2);
5812   if (ccvt1 != ccvt2)
5813     return 0;
5814   if (ix86_function_regparm (type1, NULL)
5815       != ix86_function_regparm (type2, NULL))
5816     return 0;
5817 
5818   return 1;
5819 }
5820 
5821 /* Return the regparm value for a function with the indicated TYPE and DECL.
5822    DECL may be NULL when calling function indirectly
5823    or considering a libcall.  */
5824 
5825 static int
5826 ix86_function_regparm (const_tree type, const_tree decl)
5827 {
5828   tree attr;
5829   int regparm;
5830   unsigned int ccvt;
5831 
5832   if (TARGET_64BIT)
5833     return (ix86_function_type_abi (type) == SYSV_ABI
5834 	    ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5835   ccvt = ix86_get_callcvt (type);
5836   regparm = ix86_regparm;
5837 
5838   if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5839     {
5840       attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5841       if (attr)
5842 	{
5843 	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5844 	  return regparm;
5845 	}
5846     }
5847   else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5848     return 2;
5849   else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5850     return 1;
5851 
5852   /* Use register calling convention for local functions when possible.  */
5853   if (decl
5854       && TREE_CODE (decl) == FUNCTION_DECL)
5855     {
5856       cgraph_node *target = cgraph_node::get (decl);
5857       if (target)
5858 	target = target->function_symbol ();
5859 
5860       /* Caller and callee must agree on the calling convention, so
5861 	 checking here just optimize means that with
5862 	 __attribute__((optimize (...))) caller could use regparm convention
5863 	 and callee not, or vice versa.  Instead look at whether the callee
5864 	 is optimized or not.  */
5865       if (target && opt_for_fn (target->decl, optimize)
5866 	  && !(profile_flag && !flag_fentry))
5867 	{
5868 	  cgraph_local_info *i = &target->local;
5869 	  if (i && i->local && i->can_change_signature)
5870 	    {
5871 	      int local_regparm, globals = 0, regno;
5872 
5873 	      /* Make sure no regparm register is taken by a
5874 		 fixed register variable.  */
5875 	      for (local_regparm = 0; local_regparm < REGPARM_MAX;
5876 		   local_regparm++)
5877 		if (fixed_regs[local_regparm])
5878 		  break;
5879 
5880 	      /* We don't want to use regparm(3) for nested functions as
5881 		 these use a static chain pointer in the third argument.  */
5882 	      if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5883 		local_regparm = 2;
5884 
5885 	      /* Save a register for the split stack.  */
5886 	      if (local_regparm == 3 && flag_split_stack)
5887 		local_regparm = 2;
5888 
5889 	      /* Each fixed register usage increases register pressure,
5890 		 so less registers should be used for argument passing.
5891 		 This functionality can be overriden by an explicit
5892 		 regparm value.  */
5893 	      for (regno = AX_REG; regno <= DI_REG; regno++)
5894 		if (fixed_regs[regno])
5895 		  globals++;
5896 
5897 	      local_regparm
5898 		= globals < local_regparm ? local_regparm - globals : 0;
5899 
5900 	      if (local_regparm > regparm)
5901 		regparm = local_regparm;
5902 	    }
5903 	}
5904     }
5905 
5906   return regparm;
5907 }
5908 
5909 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5910    DFmode (2) arguments in SSE registers for a function with the
5911    indicated TYPE and DECL.  DECL may be NULL when calling function
5912    indirectly or considering a libcall.  Return -1 if any FP parameter
5913    should be rejected by error.  This is used in siutation we imply SSE
5914    calling convetion but the function is called from another function with
5915    SSE disabled. Otherwise return 0.  */
5916 
5917 static int
5918 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5919 {
5920   gcc_assert (!TARGET_64BIT);
5921 
5922   /* Use SSE registers to pass SFmode and DFmode arguments if requested
5923      by the sseregparm attribute.  */
5924   if (TARGET_SSEREGPARM
5925       || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5926     {
5927       if (!TARGET_SSE)
5928 	{
5929 	  if (warn)
5930 	    {
5931 	      if (decl)
5932 		error ("calling %qD with attribute sseregparm without "
5933 		       "SSE/SSE2 enabled", decl);
5934 	      else
5935 		error ("calling %qT with attribute sseregparm without "
5936 		       "SSE/SSE2 enabled", type);
5937 	    }
5938 	  return 0;
5939 	}
5940 
5941       return 2;
5942     }
5943 
5944   if (!decl)
5945     return 0;
5946 
5947   cgraph_node *target = cgraph_node::get (decl);
5948   if (target)
5949     target = target->function_symbol ();
5950 
5951   /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5952      (and DFmode for SSE2) arguments in SSE registers.  */
5953   if (target
5954       /* TARGET_SSE_MATH */
5955       && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5956       && opt_for_fn (target->decl, optimize)
5957       && !(profile_flag && !flag_fentry))
5958     {
5959       cgraph_local_info *i = &target->local;
5960       if (i && i->local && i->can_change_signature)
5961 	{
5962 	  /* Refuse to produce wrong code when local function with SSE enabled
5963 	     is called from SSE disabled function.
5964 	     FIXME: We need a way to detect these cases cross-ltrans partition
5965 	     and avoid using SSE calling conventions on local functions called
5966 	     from function with SSE disabled.  For now at least delay the
5967 	     warning until we know we are going to produce wrong code.
5968 	     See PR66047  */
5969 	  if (!TARGET_SSE && warn)
5970 	    return -1;
5971 	  return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5972 				->x_ix86_isa_flags) ? 2 : 1;
5973 	}
5974     }
5975 
5976   return 0;
5977 }
5978 
5979 /* Return true if EAX is live at the start of the function.  Used by
5980    ix86_expand_prologue to determine if we need special help before
5981    calling allocate_stack_worker.  */
5982 
5983 static bool
5984 ix86_eax_live_at_start_p (void)
5985 {
5986   /* Cheat.  Don't bother working forward from ix86_function_regparm
5987      to the function type to whether an actual argument is located in
5988      eax.  Instead just look at cfg info, which is still close enough
5989      to correct at this point.  This gives false positives for broken
5990      functions that might use uninitialized data that happens to be
5991      allocated in eax, but who cares?  */
5992   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5993 }
5994 
5995 static bool
5996 ix86_keep_aggregate_return_pointer (tree fntype)
5997 {
5998   tree attr;
5999 
6000   if (!TARGET_64BIT)
6001     {
6002       attr = lookup_attribute ("callee_pop_aggregate_return",
6003 			       TYPE_ATTRIBUTES (fntype));
6004       if (attr)
6005 	return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6006 
6007       /* For 32-bit MS-ABI the default is to keep aggregate
6008          return pointer.  */
6009       if (ix86_function_type_abi (fntype) == MS_ABI)
6010 	return true;
6011     }
6012   return KEEP_AGGREGATE_RETURN_POINTER != 0;
6013 }
6014 
6015 /* Value is the number of bytes of arguments automatically
6016    popped when returning from a subroutine call.
6017    FUNDECL is the declaration node of the function (as a tree),
6018    FUNTYPE is the data type of the function (as a tree),
6019    or for a library call it is an identifier node for the subroutine name.
6020    SIZE is the number of bytes of arguments passed on the stack.
6021 
6022    On the 80386, the RTD insn may be used to pop them if the number
6023      of args is fixed, but if the number is variable then the caller
6024      must pop them all.  RTD can't be used for library calls now
6025      because the library is compiled with the Unix compiler.
6026    Use of RTD is a selectable option, since it is incompatible with
6027    standard Unix calling sequences.  If the option is not selected,
6028    the caller must always pop the args.
6029 
6030    The attribute stdcall is equivalent to RTD on a per module basis.  */
6031 
6032 static int
6033 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6034 {
6035   unsigned int ccvt;
6036 
6037   /* None of the 64-bit ABIs pop arguments.  */
6038   if (TARGET_64BIT)
6039     return 0;
6040 
6041   ccvt = ix86_get_callcvt (funtype);
6042 
6043   if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6044 	       | IX86_CALLCVT_THISCALL)) != 0
6045       && ! stdarg_p (funtype))
6046     return size;
6047 
6048   /* Lose any fake structure return argument if it is passed on the stack.  */
6049   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6050       && !ix86_keep_aggregate_return_pointer (funtype))
6051     {
6052       int nregs = ix86_function_regparm (funtype, fundecl);
6053       if (nregs == 0)
6054 	return GET_MODE_SIZE (Pmode);
6055     }
6056 
6057   return 0;
6058 }
6059 
6060 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook.  */
6061 
6062 static bool
6063 ix86_legitimate_combined_insn (rtx_insn *insn)
6064 {
6065   /* Check operand constraints in case hard registers were propagated
6066      into insn pattern.  This check prevents combine pass from
6067      generating insn patterns with invalid hard register operands.
6068      These invalid insns can eventually confuse reload to error out
6069      with a spill failure.  See also PRs 46829 and 46843.  */
6070   if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6071     {
6072       int i;
6073 
6074       extract_insn (insn);
6075       preprocess_constraints (insn);
6076 
6077       int n_operands = recog_data.n_operands;
6078       int n_alternatives = recog_data.n_alternatives;
6079       for (i = 0; i < n_operands; i++)
6080 	{
6081 	  rtx op = recog_data.operand[i];
6082 	  machine_mode mode = GET_MODE (op);
6083 	  const operand_alternative *op_alt;
6084 	  int offset = 0;
6085 	  bool win;
6086 	  int j;
6087 
6088 	  /* For pre-AVX disallow unaligned loads/stores where the
6089 	     instructions don't support it.  */
6090 	  if (!TARGET_AVX
6091 	      && VECTOR_MODE_P (GET_MODE (op))
6092 	      && misaligned_operand (op, GET_MODE (op)))
6093 	    {
6094 	      int min_align = get_attr_ssememalign (insn);
6095 	      if (min_align == 0)
6096 		return false;
6097 	    }
6098 
6099 	  /* A unary operator may be accepted by the predicate, but it
6100 	     is irrelevant for matching constraints.  */
6101 	  if (UNARY_P (op))
6102 	    op = XEXP (op, 0);
6103 
6104 	  if (GET_CODE (op) == SUBREG)
6105 	    {
6106 	      if (REG_P (SUBREG_REG (op))
6107 		  && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6108 		offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6109 					      GET_MODE (SUBREG_REG (op)),
6110 					      SUBREG_BYTE (op),
6111 					      GET_MODE (op));
6112 	      op = SUBREG_REG (op);
6113 	    }
6114 
6115 	  if (!(REG_P (op) && HARD_REGISTER_P (op)))
6116 	    continue;
6117 
6118 	  op_alt = recog_op_alt;
6119 
6120 	  /* Operand has no constraints, anything is OK.  */
6121  	  win = !n_alternatives;
6122 
6123 	  alternative_mask preferred = get_preferred_alternatives (insn);
6124 	  for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6125 	    {
6126 	      if (!TEST_BIT (preferred, j))
6127 		continue;
6128 	      if (op_alt[i].anything_ok
6129 		  || (op_alt[i].matches != -1
6130 		      && operands_match_p
6131 			  (recog_data.operand[i],
6132 			   recog_data.operand[op_alt[i].matches]))
6133 		  || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6134 		{
6135 		  win = true;
6136 		  break;
6137 		}
6138 	    }
6139 
6140 	  if (!win)
6141 	    return false;
6142 	}
6143     }
6144 
6145   return true;
6146 }
6147 
6148 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
6149 
6150 static unsigned HOST_WIDE_INT
6151 ix86_asan_shadow_offset (void)
6152 {
6153   return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6154 				     : HOST_WIDE_INT_C (0x7fff8000))
6155 		     : (HOST_WIDE_INT_1 << 29);
6156 }
6157 
6158 /* Argument support functions.  */
6159 
6160 /* Return true when register may be used to pass function parameters.  */
6161 bool
6162 ix86_function_arg_regno_p (int regno)
6163 {
6164   int i;
6165   enum calling_abi call_abi;
6166   const int *parm_regs;
6167 
6168   if (TARGET_MPX && BND_REGNO_P (regno))
6169     return true;
6170 
6171   if (!TARGET_64BIT)
6172     {
6173       if (TARGET_MACHO)
6174         return (regno < REGPARM_MAX
6175                 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6176       else
6177         return (regno < REGPARM_MAX
6178 	        || (TARGET_MMX && MMX_REGNO_P (regno)
6179 	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6180 	        || (TARGET_SSE && SSE_REGNO_P (regno)
6181 		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6182     }
6183 
6184   if (TARGET_SSE && SSE_REGNO_P (regno)
6185       && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6186     return true;
6187 
6188   /* TODO: The function should depend on current function ABI but
6189      builtins.c would need updating then. Therefore we use the
6190      default ABI.  */
6191   call_abi = ix86_cfun_abi ();
6192 
6193   /* RAX is used as hidden argument to va_arg functions.  */
6194   if (call_abi == SYSV_ABI && regno == AX_REG)
6195     return true;
6196 
6197   if (call_abi == MS_ABI)
6198     parm_regs = x86_64_ms_abi_int_parameter_registers;
6199   else
6200     parm_regs = x86_64_int_parameter_registers;
6201 
6202   for (i = 0; i < (call_abi == MS_ABI
6203 		   ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6204     if (regno == parm_regs[i])
6205       return true;
6206   return false;
6207 }
6208 
6209 /* Return if we do not know how to pass TYPE solely in registers.  */
6210 
6211 static bool
6212 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6213 {
6214   if (must_pass_in_stack_var_size_or_pad (mode, type))
6215     return true;
6216 
6217   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
6218      The layout_type routine is crafty and tries to trick us into passing
6219      currently unsupported vector types on the stack by using TImode.  */
6220   return (!TARGET_64BIT && mode == TImode
6221 	  && type && TREE_CODE (type) != VECTOR_TYPE);
6222 }
6223 
6224 /* It returns the size, in bytes, of the area reserved for arguments passed
6225    in registers for the function represented by fndecl dependent to the used
6226    abi format.  */
6227 int
6228 ix86_reg_parm_stack_space (const_tree fndecl)
6229 {
6230   enum calling_abi call_abi = SYSV_ABI;
6231   if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6232     call_abi = ix86_function_abi (fndecl);
6233   else
6234     call_abi = ix86_function_type_abi (fndecl);
6235   if (TARGET_64BIT && call_abi == MS_ABI)
6236     return 32;
6237   return 0;
6238 }
6239 
6240 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6241    call abi used.  */
6242 enum calling_abi
6243 ix86_function_type_abi (const_tree fntype)
6244 {
6245   if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6246     {
6247       enum calling_abi abi = ix86_abi;
6248       if (abi == SYSV_ABI)
6249 	{
6250 	  if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6251 	    {
6252 	      if (TARGET_X32)
6253 		{
6254 		  static bool warned = false;
6255 		  if (!warned)
6256 		    {
6257 		      error ("X32 does not support ms_abi attribute");
6258 		      warned = true;
6259 		    }
6260 		}
6261 	      abi = MS_ABI;
6262 	    }
6263 	}
6264       else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6265 	abi = SYSV_ABI;
6266       return abi;
6267     }
6268   return ix86_abi;
6269 }
6270 
6271 /* We add this as a workaround in order to use libc_has_function
6272    hook in i386.md.  */
6273 bool
6274 ix86_libc_has_function (enum function_class fn_class)
6275 {
6276   return targetm.libc_has_function (fn_class);
6277 }
6278 
6279 static bool
6280 ix86_function_ms_hook_prologue (const_tree fn)
6281 {
6282   if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6283     {
6284       if (decl_function_context (fn) != NULL_TREE)
6285 	error_at (DECL_SOURCE_LOCATION (fn),
6286 		  "ms_hook_prologue is not compatible with nested function");
6287       else
6288         return true;
6289     }
6290   return false;
6291 }
6292 
6293 static enum calling_abi
6294 ix86_function_abi (const_tree fndecl)
6295 {
6296   if (! fndecl)
6297     return ix86_abi;
6298   return ix86_function_type_abi (TREE_TYPE (fndecl));
6299 }
6300 
6301 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6302    call abi used.  */
6303 enum calling_abi
6304 ix86_cfun_abi (void)
6305 {
6306   if (! cfun)
6307     return ix86_abi;
6308   return cfun->machine->call_abi;
6309 }
6310 
6311 /* Write the extra assembler code needed to declare a function properly.  */
6312 
6313 void
6314 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6315 				tree decl)
6316 {
6317   bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6318 
6319   if (is_ms_hook)
6320     {
6321       int i, filler_count = (TARGET_64BIT ? 32 : 16);
6322       unsigned int filler_cc = 0xcccccccc;
6323 
6324       for (i = 0; i < filler_count; i += 4)
6325         fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6326     }
6327 
6328 #ifdef SUBTARGET_ASM_UNWIND_INIT
6329   SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6330 #endif
6331 
6332   ASM_OUTPUT_LABEL (asm_out_file, fname);
6333 
6334   /* Output magic byte marker, if hot-patch attribute is set.  */
6335   if (is_ms_hook)
6336     {
6337       if (TARGET_64BIT)
6338 	{
6339 	  /* leaq [%rsp + 0], %rsp  */
6340 	  asm_fprintf (asm_out_file, ASM_BYTE
6341 		       "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6342 	}
6343       else
6344 	{
6345           /* movl.s %edi, %edi
6346 	     push   %ebp
6347 	     movl.s %esp, %ebp */
6348 	  asm_fprintf (asm_out_file, ASM_BYTE
6349 		       "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6350 	}
6351     }
6352 }
6353 
6354 /* regclass.c  */
6355 extern void init_regs (void);
6356 
6357 /* Implementation of call abi switching target hook. Specific to FNDECL
6358    the specific call register sets are set.  See also
6359    ix86_conditional_register_usage for more details.  */
6360 void
6361 ix86_call_abi_override (const_tree fndecl)
6362 {
6363   if (fndecl == NULL_TREE)
6364     cfun->machine->call_abi = ix86_abi;
6365   else
6366     cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6367 }
6368 
6369 /* Return 1 if pseudo register should be created and used to hold
6370    GOT address for PIC code.  */
6371 bool
6372 ix86_use_pseudo_pic_reg (void)
6373 {
6374   if ((TARGET_64BIT
6375        && (ix86_cmodel == CM_SMALL_PIC
6376 	   || TARGET_PECOFF))
6377       || !flag_pic)
6378     return false;
6379   return true;
6380 }
6381 
6382 /* Initialize large model PIC register.  */
6383 
6384 static void
6385 ix86_init_large_pic_reg (unsigned int tmp_regno)
6386 {
6387   rtx_code_label *label;
6388   rtx tmp_reg;
6389 
6390   gcc_assert (Pmode == DImode);
6391   label = gen_label_rtx ();
6392   emit_label (label);
6393   LABEL_PRESERVE_P (label) = 1;
6394   tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6395   gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6396   emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6397 				label));
6398   emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6399   emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6400 			    pic_offset_table_rtx, tmp_reg));
6401 }
6402 
6403 /* Create and initialize PIC register if required.  */
6404 static void
6405 ix86_init_pic_reg (void)
6406 {
6407   edge entry_edge;
6408   rtx_insn *seq;
6409 
6410   if (!ix86_use_pseudo_pic_reg ())
6411     return;
6412 
6413   start_sequence ();
6414 
6415   if (TARGET_64BIT)
6416     {
6417       if (ix86_cmodel == CM_LARGE_PIC)
6418 	ix86_init_large_pic_reg (R11_REG);
6419       else
6420 	emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6421     }
6422   else
6423     {
6424       /*  If there is future mcount call in the function it is more profitable
6425 	  to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM.  */
6426       rtx reg = crtl->profile
6427 		? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6428 		: pic_offset_table_rtx;
6429       rtx insn = emit_insn (gen_set_got (reg));
6430       RTX_FRAME_RELATED_P (insn) = 1;
6431       if (crtl->profile)
6432         emit_move_insn (pic_offset_table_rtx, reg);
6433       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6434     }
6435 
6436   seq = get_insns ();
6437   end_sequence ();
6438 
6439   entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6440   insert_insn_on_edge (seq, entry_edge);
6441   commit_one_edge_insertion (entry_edge);
6442 }
6443 
6444 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6445    for a call to a function whose data type is FNTYPE.
6446    For a library call, FNTYPE is 0.  */
6447 
6448 void
6449 init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
6450 		      tree fntype,	/* tree ptr for function decl */
6451 		      rtx libname,	/* SYMBOL_REF of library name or 0 */
6452 		      tree fndecl,
6453 		      int caller)
6454 {
6455   struct cgraph_local_info *i = NULL;
6456   struct cgraph_node *target = NULL;
6457 
6458   memset (cum, 0, sizeof (*cum));
6459 
6460   if (fndecl)
6461     {
6462       target = cgraph_node::get (fndecl);
6463       if (target)
6464 	{
6465 	  target = target->function_symbol ();
6466 	  i = cgraph_node::local_info (target->decl);
6467 	  cum->call_abi = ix86_function_abi (target->decl);
6468 	}
6469       else
6470 	cum->call_abi = ix86_function_abi (fndecl);
6471     }
6472   else
6473     cum->call_abi = ix86_function_type_abi (fntype);
6474 
6475   cum->caller = caller;
6476 
6477   /* Set up the number of registers to use for passing arguments.  */
6478   cum->nregs = ix86_regparm;
6479   if (TARGET_64BIT)
6480     {
6481       cum->nregs = (cum->call_abi == SYSV_ABI
6482                    ? X86_64_REGPARM_MAX
6483                    : X86_64_MS_REGPARM_MAX);
6484     }
6485   if (TARGET_SSE)
6486     {
6487       cum->sse_nregs = SSE_REGPARM_MAX;
6488       if (TARGET_64BIT)
6489         {
6490           cum->sse_nregs = (cum->call_abi == SYSV_ABI
6491                            ? X86_64_SSE_REGPARM_MAX
6492                            : X86_64_MS_SSE_REGPARM_MAX);
6493         }
6494     }
6495   if (TARGET_MMX)
6496     cum->mmx_nregs = MMX_REGPARM_MAX;
6497   cum->warn_avx512f = true;
6498   cum->warn_avx = true;
6499   cum->warn_sse = true;
6500   cum->warn_mmx = true;
6501 
6502   /* Because type might mismatch in between caller and callee, we need to
6503      use actual type of function for local calls.
6504      FIXME: cgraph_analyze can be told to actually record if function uses
6505      va_start so for local functions maybe_vaarg can be made aggressive
6506      helping K&R code.
6507      FIXME: once typesytem is fixed, we won't need this code anymore.  */
6508   if (i && i->local && i->can_change_signature)
6509     fntype = TREE_TYPE (target->decl);
6510   cum->stdarg = stdarg_p (fntype);
6511   cum->maybe_vaarg = (fntype
6512 		      ? (!prototype_p (fntype) || stdarg_p (fntype))
6513 		      : !libname);
6514 
6515   cum->bnd_regno = FIRST_BND_REG;
6516   cum->bnds_in_bt = 0;
6517   cum->force_bnd_pass = 0;
6518   cum->decl = fndecl;
6519 
6520   if (!TARGET_64BIT)
6521     {
6522       /* If there are variable arguments, then we won't pass anything
6523          in registers in 32-bit mode. */
6524       if (stdarg_p (fntype))
6525 	{
6526 	  cum->nregs = 0;
6527 	  cum->sse_nregs = 0;
6528 	  cum->mmx_nregs = 0;
6529 	  cum->warn_avx512f = false;
6530 	  cum->warn_avx = false;
6531 	  cum->warn_sse = false;
6532 	  cum->warn_mmx = false;
6533 	  return;
6534 	}
6535 
6536       /* Use ecx and edx registers if function has fastcall attribute,
6537 	 else look for regparm information.  */
6538       if (fntype)
6539 	{
6540 	  unsigned int ccvt = ix86_get_callcvt (fntype);
6541 	  if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6542 	    {
6543 	      cum->nregs = 1;
6544 	      cum->fastcall = 1; /* Same first register as in fastcall.  */
6545 	    }
6546 	  else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6547 	    {
6548 	      cum->nregs = 2;
6549 	      cum->fastcall = 1;
6550 	    }
6551 	  else
6552 	    cum->nregs = ix86_function_regparm (fntype, fndecl);
6553 	}
6554 
6555       /* Set up the number of SSE registers used for passing SFmode
6556 	 and DFmode arguments.  Warn for mismatching ABI.  */
6557       cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6558     }
6559 }
6560 
6561 /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
6562    But in the case of vector types, it is some vector mode.
6563 
6564    When we have only some of our vector isa extensions enabled, then there
6565    are some modes for which vector_mode_supported_p is false.  For these
6566    modes, the generic vector support in gcc will choose some non-vector mode
6567    in order to implement the type.  By computing the natural mode, we'll
6568    select the proper ABI location for the operand and not depend on whatever
6569    the middle-end decides to do with these vector types.
6570 
6571    The midde-end can't deal with the vector types > 16 bytes.  In this
6572    case, we return the original mode and warn ABI change if CUM isn't
6573    NULL.
6574 
6575    If INT_RETURN is true, warn ABI change if the vector mode isn't
6576    available for function return value.  */
6577 
6578 static machine_mode
6579 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6580 		   bool in_return)
6581 {
6582   machine_mode mode = TYPE_MODE (type);
6583 
6584   if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6585     {
6586       HOST_WIDE_INT size = int_size_in_bytes (type);
6587       if ((size == 8 || size == 16 || size == 32 || size == 64)
6588 	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
6589 	  && TYPE_VECTOR_SUBPARTS (type) > 1)
6590 	{
6591 	  machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6592 
6593 	  /* There are no XFmode vector modes.  */
6594 	  if (innermode == XFmode)
6595 	    return mode;
6596 
6597 	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6598 	    mode = MIN_MODE_VECTOR_FLOAT;
6599 	  else
6600 	    mode = MIN_MODE_VECTOR_INT;
6601 
6602 	  /* Get the mode which has this inner mode and number of units.  */
6603 	  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6604 	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6605 		&& GET_MODE_INNER (mode) == innermode)
6606 	      {
6607 		if (size == 64 && !TARGET_AVX512F)
6608 		  {
6609 		    static bool warnedavx512f;
6610 		    static bool warnedavx512f_ret;
6611 
6612 		    if (cum && cum->warn_avx512f && !warnedavx512f)
6613 		      {
6614 			if (warning (OPT_Wpsabi, "AVX512F vector argument "
6615 				     "without AVX512F enabled changes the ABI"))
6616 			  warnedavx512f = true;
6617 		      }
6618 		    else if (in_return && !warnedavx512f_ret)
6619 		      {
6620 			if (warning (OPT_Wpsabi, "AVX512F vector return "
6621 				     "without AVX512F enabled changes the ABI"))
6622 			  warnedavx512f_ret = true;
6623 		      }
6624 
6625 		    return TYPE_MODE (type);
6626 		  }
6627 		else if (size == 32 && !TARGET_AVX)
6628 		  {
6629 		    static bool warnedavx;
6630 		    static bool warnedavx_ret;
6631 
6632 		    if (cum && cum->warn_avx && !warnedavx)
6633 		      {
6634 			if (warning (OPT_Wpsabi, "AVX vector argument "
6635 				     "without AVX enabled changes the ABI"))
6636 			  warnedavx = true;
6637 		      }
6638 		    else if (in_return && !warnedavx_ret)
6639 		      {
6640 			if (warning (OPT_Wpsabi, "AVX vector return "
6641 				     "without AVX enabled changes the ABI"))
6642 			  warnedavx_ret = true;
6643 		      }
6644 
6645 		    return TYPE_MODE (type);
6646 		  }
6647 		else if (((size == 8 && TARGET_64BIT) || size == 16)
6648 			 && !TARGET_SSE)
6649 		  {
6650 		    static bool warnedsse;
6651 		    static bool warnedsse_ret;
6652 
6653 		    if (cum && cum->warn_sse && !warnedsse)
6654 		      {
6655 			if (warning (OPT_Wpsabi, "SSE vector argument "
6656 				     "without SSE enabled changes the ABI"))
6657 			  warnedsse = true;
6658 		      }
6659 		    else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6660 		      {
6661 			if (warning (OPT_Wpsabi, "SSE vector return "
6662 				     "without SSE enabled changes the ABI"))
6663 			  warnedsse_ret = true;
6664 		      }
6665 		  }
6666 		else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6667 		  {
6668 		    static bool warnedmmx;
6669 		    static bool warnedmmx_ret;
6670 
6671 		    if (cum && cum->warn_mmx && !warnedmmx)
6672 		      {
6673 			if (warning (OPT_Wpsabi, "MMX vector argument "
6674 				     "without MMX enabled changes the ABI"))
6675 			  warnedmmx = true;
6676 		      }
6677 		    else if (in_return && !warnedmmx_ret)
6678 		      {
6679 			if (warning (OPT_Wpsabi, "MMX vector return "
6680 				     "without MMX enabled changes the ABI"))
6681 			  warnedmmx_ret = true;
6682 		      }
6683 		  }
6684 		return mode;
6685 	      }
6686 
6687 	  gcc_unreachable ();
6688 	}
6689     }
6690 
6691   return mode;
6692 }
6693 
6694 /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
6695    this may not agree with the mode that the type system has chosen for the
6696    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
6697    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
6698 
6699 static rtx
6700 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6701 		     unsigned int regno)
6702 {
6703   rtx tmp;
6704 
6705   if (orig_mode != BLKmode)
6706     tmp = gen_rtx_REG (orig_mode, regno);
6707   else
6708     {
6709       tmp = gen_rtx_REG (mode, regno);
6710       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6711       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6712     }
6713 
6714   return tmp;
6715 }
6716 
6717 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
6718    of this code is to classify each 8bytes of incoming argument by the register
6719    class and assign registers accordingly.  */
6720 
6721 /* Return the union class of CLASS1 and CLASS2.
6722    See the x86-64 PS ABI for details.  */
6723 
6724 static enum x86_64_reg_class
6725 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6726 {
6727   /* Rule #1: If both classes are equal, this is the resulting class.  */
6728   if (class1 == class2)
6729     return class1;
6730 
6731   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6732      the other class.  */
6733   if (class1 == X86_64_NO_CLASS)
6734     return class2;
6735   if (class2 == X86_64_NO_CLASS)
6736     return class1;
6737 
6738   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
6739   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6740     return X86_64_MEMORY_CLASS;
6741 
6742   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
6743   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6744       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6745     return X86_64_INTEGERSI_CLASS;
6746   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6747       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6748     return X86_64_INTEGER_CLASS;
6749 
6750   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6751      MEMORY is used.  */
6752   if (class1 == X86_64_X87_CLASS
6753       || class1 == X86_64_X87UP_CLASS
6754       || class1 == X86_64_COMPLEX_X87_CLASS
6755       || class2 == X86_64_X87_CLASS
6756       || class2 == X86_64_X87UP_CLASS
6757       || class2 == X86_64_COMPLEX_X87_CLASS)
6758     return X86_64_MEMORY_CLASS;
6759 
6760   /* Rule #6: Otherwise class SSE is used.  */
6761   return X86_64_SSE_CLASS;
6762 }
6763 
6764 /* Classify the argument of type TYPE and mode MODE.
6765    CLASSES will be filled by the register class used to pass each word
6766    of the operand.  The number of words is returned.  In case the parameter
6767    should be passed in memory, 0 is returned. As a special case for zero
6768    sized containers, classes[0] will be NO_CLASS and 1 is returned.
6769 
6770    BIT_OFFSET is used internally for handling records and specifies offset
6771    of the offset in bits modulo 512 to avoid overflow cases.
6772 
6773    See the x86-64 PS ABI for details.
6774 */
6775 
6776 static int
6777 classify_argument (machine_mode mode, const_tree type,
6778 		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6779 {
6780   HOST_WIDE_INT bytes =
6781     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6782   int words
6783     = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6784 
6785   /* Variable sized entities are always passed/returned in memory.  */
6786   if (bytes < 0)
6787     return 0;
6788 
6789   if (mode != VOIDmode
6790       && targetm.calls.must_pass_in_stack (mode, type))
6791     return 0;
6792 
6793   if (type && AGGREGATE_TYPE_P (type))
6794     {
6795       int i;
6796       tree field;
6797       enum x86_64_reg_class subclasses[MAX_CLASSES];
6798 
6799       /* On x86-64 we pass structures larger than 64 bytes on the stack.  */
6800       if (bytes > 64)
6801 	return 0;
6802 
6803       for (i = 0; i < words; i++)
6804 	classes[i] = X86_64_NO_CLASS;
6805 
6806       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
6807 	 signalize memory class, so handle it as special case.  */
6808       if (!words)
6809 	{
6810 	  classes[0] = X86_64_NO_CLASS;
6811 	  return 1;
6812 	}
6813 
6814       /* Classify each field of record and merge classes.  */
6815       switch (TREE_CODE (type))
6816 	{
6817 	case RECORD_TYPE:
6818 	  /* And now merge the fields of structure.  */
6819 	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6820 	    {
6821 	      if (TREE_CODE (field) == FIELD_DECL)
6822 		{
6823 		  int num;
6824 
6825 		  if (TREE_TYPE (field) == error_mark_node)
6826 		    continue;
6827 
6828 		  /* Bitfields are always classified as integer.  Handle them
6829 		     early, since later code would consider them to be
6830 		     misaligned integers.  */
6831 		  if (DECL_BIT_FIELD (field))
6832 		    {
6833 		      for (i = (int_bit_position (field)
6834 				+ (bit_offset % 64)) / 8 / 8;
6835 			   i < ((int_bit_position (field) + (bit_offset % 64))
6836 			        + tree_to_shwi (DECL_SIZE (field))
6837 				+ 63) / 8 / 8; i++)
6838 			classes[i] =
6839 			  merge_classes (X86_64_INTEGER_CLASS,
6840 					 classes[i]);
6841 		    }
6842 		  else
6843 		    {
6844 		      int pos;
6845 
6846 		      type = TREE_TYPE (field);
6847 
6848 		      /* Flexible array member is ignored.  */
6849 		      if (TYPE_MODE (type) == BLKmode
6850 			  && TREE_CODE (type) == ARRAY_TYPE
6851 			  && TYPE_SIZE (type) == NULL_TREE
6852 			  && TYPE_DOMAIN (type) != NULL_TREE
6853 			  && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6854 			      == NULL_TREE))
6855 			{
6856 			  static bool warned;
6857 
6858 			  if (!warned && warn_psabi)
6859 			    {
6860 			      warned = true;
6861 			      inform (input_location,
6862 				      "the ABI of passing struct with"
6863 				      " a flexible array member has"
6864 				      " changed in GCC 4.4");
6865 			    }
6866 			  continue;
6867 			}
6868 		      num = classify_argument (TYPE_MODE (type), type,
6869 					       subclasses,
6870 					       (int_bit_position (field)
6871 						+ bit_offset) % 512);
6872 		      if (!num)
6873 			return 0;
6874 		      pos = (int_bit_position (field)
6875 			     + (bit_offset % 64)) / 8 / 8;
6876 		      for (i = 0; i < num && (i + pos) < words; i++)
6877 			classes[i + pos] =
6878 			  merge_classes (subclasses[i], classes[i + pos]);
6879 		    }
6880 		}
6881 	    }
6882 	  break;
6883 
6884 	case ARRAY_TYPE:
6885 	  /* Arrays are handled as small records.  */
6886 	  {
6887 	    int num;
6888 	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6889 				     TREE_TYPE (type), subclasses, bit_offset);
6890 	    if (!num)
6891 	      return 0;
6892 
6893 	    /* The partial classes are now full classes.  */
6894 	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6895 	      subclasses[0] = X86_64_SSE_CLASS;
6896 	    if (subclasses[0] == X86_64_INTEGERSI_CLASS
6897 		&& !((bit_offset % 64) == 0 && bytes == 4))
6898 	      subclasses[0] = X86_64_INTEGER_CLASS;
6899 
6900 	    for (i = 0; i < words; i++)
6901 	      classes[i] = subclasses[i % num];
6902 
6903 	    break;
6904 	  }
6905 	case UNION_TYPE:
6906 	case QUAL_UNION_TYPE:
6907 	  /* Unions are similar to RECORD_TYPE but offset is always 0.
6908 	     */
6909 	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6910 	    {
6911 	      if (TREE_CODE (field) == FIELD_DECL)
6912 		{
6913 		  int num;
6914 
6915 		  if (TREE_TYPE (field) == error_mark_node)
6916 		    continue;
6917 
6918 		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6919 					   TREE_TYPE (field), subclasses,
6920 					   bit_offset);
6921 		  if (!num)
6922 		    return 0;
6923 		  for (i = 0; i < num && i < words; i++)
6924 		    classes[i] = merge_classes (subclasses[i], classes[i]);
6925 		}
6926 	    }
6927 	  break;
6928 
6929 	default:
6930 	  gcc_unreachable ();
6931 	}
6932 
6933       if (words > 2)
6934 	{
6935 	  /* When size > 16 bytes, if the first one isn't
6936 	     X86_64_SSE_CLASS or any other ones aren't
6937 	     X86_64_SSEUP_CLASS, everything should be passed in
6938 	     memory.  */
6939 	  if (classes[0] != X86_64_SSE_CLASS)
6940 	      return 0;
6941 
6942 	  for (i = 1; i < words; i++)
6943 	    if (classes[i] != X86_64_SSEUP_CLASS)
6944 	      return 0;
6945 	}
6946 
6947       /* Final merger cleanup.  */
6948       for (i = 0; i < words; i++)
6949 	{
6950 	  /* If one class is MEMORY, everything should be passed in
6951 	     memory.  */
6952 	  if (classes[i] == X86_64_MEMORY_CLASS)
6953 	    return 0;
6954 
6955 	  /* The X86_64_SSEUP_CLASS should be always preceded by
6956 	     X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
6957 	  if (classes[i] == X86_64_SSEUP_CLASS
6958 	      && classes[i - 1] != X86_64_SSE_CLASS
6959 	      && classes[i - 1] != X86_64_SSEUP_CLASS)
6960 	    {
6961 	      /* The first one should never be X86_64_SSEUP_CLASS.  */
6962 	      gcc_assert (i != 0);
6963 	      classes[i] = X86_64_SSE_CLASS;
6964 	    }
6965 
6966 	  /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6967 	       everything should be passed in memory.  */
6968 	  if (classes[i] == X86_64_X87UP_CLASS
6969 	      && (classes[i - 1] != X86_64_X87_CLASS))
6970 	    {
6971 	      static bool warned;
6972 
6973 	      /* The first one should never be X86_64_X87UP_CLASS.  */
6974 	      gcc_assert (i != 0);
6975 	      if (!warned && warn_psabi)
6976 		{
6977 		  warned = true;
6978 		  inform (input_location,
6979 			  "the ABI of passing union with long double"
6980 			  " has changed in GCC 4.4");
6981 		}
6982 	      return 0;
6983 	    }
6984 	}
6985       return words;
6986     }
6987 
6988   /* Compute alignment needed.  We align all types to natural boundaries with
6989      exception of XFmode that is aligned to 64bits.  */
6990   if (mode != VOIDmode && mode != BLKmode)
6991     {
6992       int mode_alignment = GET_MODE_BITSIZE (mode);
6993 
6994       if (mode == XFmode)
6995 	mode_alignment = 128;
6996       else if (mode == XCmode)
6997 	mode_alignment = 256;
6998       if (COMPLEX_MODE_P (mode))
6999 	mode_alignment /= 2;
7000       /* Misaligned fields are always returned in memory.  */
7001       if (bit_offset % mode_alignment)
7002 	return 0;
7003     }
7004 
7005   /* for V1xx modes, just use the base mode */
7006   if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7007       && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
7008     mode = GET_MODE_INNER (mode);
7009 
7010   /* Classification of atomic types.  */
7011   switch (mode)
7012     {
7013     case SDmode:
7014     case DDmode:
7015       classes[0] = X86_64_SSE_CLASS;
7016       return 1;
7017     case TDmode:
7018       classes[0] = X86_64_SSE_CLASS;
7019       classes[1] = X86_64_SSEUP_CLASS;
7020       return 2;
7021     case DImode:
7022     case SImode:
7023     case HImode:
7024     case QImode:
7025     case CSImode:
7026     case CHImode:
7027     case CQImode:
7028       {
7029 	int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7030 
7031 	/* Analyze last 128 bits only.  */
7032 	size = (size - 1) & 0x7f;
7033 
7034 	if (size < 32)
7035 	  {
7036 	    classes[0] = X86_64_INTEGERSI_CLASS;
7037 	    return 1;
7038 	  }
7039 	else if (size < 64)
7040 	  {
7041 	    classes[0] = X86_64_INTEGER_CLASS;
7042 	    return 1;
7043 	  }
7044 	else if (size < 64+32)
7045 	  {
7046 	    classes[0] = X86_64_INTEGER_CLASS;
7047 	    classes[1] = X86_64_INTEGERSI_CLASS;
7048 	    return 2;
7049 	  }
7050 	else if (size < 64+64)
7051 	  {
7052 	    classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7053 	    return 2;
7054 	  }
7055 	else
7056 	  gcc_unreachable ();
7057       }
7058     case CDImode:
7059     case TImode:
7060       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7061       return 2;
7062     case COImode:
7063     case OImode:
7064       /* OImode shouldn't be used directly.  */
7065       gcc_unreachable ();
7066     case CTImode:
7067       return 0;
7068     case SFmode:
7069       if (!(bit_offset % 64))
7070 	classes[0] = X86_64_SSESF_CLASS;
7071       else
7072 	classes[0] = X86_64_SSE_CLASS;
7073       return 1;
7074     case DFmode:
7075       classes[0] = X86_64_SSEDF_CLASS;
7076       return 1;
7077     case XFmode:
7078       classes[0] = X86_64_X87_CLASS;
7079       classes[1] = X86_64_X87UP_CLASS;
7080       return 2;
7081     case TFmode:
7082       classes[0] = X86_64_SSE_CLASS;
7083       classes[1] = X86_64_SSEUP_CLASS;
7084       return 2;
7085     case SCmode:
7086       classes[0] = X86_64_SSE_CLASS;
7087       if (!(bit_offset % 64))
7088 	return 1;
7089       else
7090 	{
7091 	  static bool warned;
7092 
7093 	  if (!warned && warn_psabi)
7094 	    {
7095 	      warned = true;
7096 	      inform (input_location,
7097 		      "the ABI of passing structure with complex float"
7098 		      " member has changed in GCC 4.4");
7099 	    }
7100 	  classes[1] = X86_64_SSESF_CLASS;
7101 	  return 2;
7102 	}
7103     case DCmode:
7104       classes[0] = X86_64_SSEDF_CLASS;
7105       classes[1] = X86_64_SSEDF_CLASS;
7106       return 2;
7107     case XCmode:
7108       classes[0] = X86_64_COMPLEX_X87_CLASS;
7109       return 1;
7110     case TCmode:
7111       /* This modes is larger than 16 bytes.  */
7112       return 0;
7113     case V8SFmode:
7114     case V8SImode:
7115     case V32QImode:
7116     case V16HImode:
7117     case V4DFmode:
7118     case V4DImode:
7119       classes[0] = X86_64_SSE_CLASS;
7120       classes[1] = X86_64_SSEUP_CLASS;
7121       classes[2] = X86_64_SSEUP_CLASS;
7122       classes[3] = X86_64_SSEUP_CLASS;
7123       return 4;
7124     case V8DFmode:
7125     case V16SFmode:
7126     case V8DImode:
7127     case V16SImode:
7128     case V32HImode:
7129     case V64QImode:
7130       classes[0] = X86_64_SSE_CLASS;
7131       classes[1] = X86_64_SSEUP_CLASS;
7132       classes[2] = X86_64_SSEUP_CLASS;
7133       classes[3] = X86_64_SSEUP_CLASS;
7134       classes[4] = X86_64_SSEUP_CLASS;
7135       classes[5] = X86_64_SSEUP_CLASS;
7136       classes[6] = X86_64_SSEUP_CLASS;
7137       classes[7] = X86_64_SSEUP_CLASS;
7138       return 8;
7139     case V4SFmode:
7140     case V4SImode:
7141     case V16QImode:
7142     case V8HImode:
7143     case V2DFmode:
7144     case V2DImode:
7145       classes[0] = X86_64_SSE_CLASS;
7146       classes[1] = X86_64_SSEUP_CLASS;
7147       return 2;
7148     case V1TImode:
7149     case V1DImode:
7150     case V2SFmode:
7151     case V2SImode:
7152     case V4HImode:
7153     case V8QImode:
7154       classes[0] = X86_64_SSE_CLASS;
7155       return 1;
7156     case BLKmode:
7157     case VOIDmode:
7158       return 0;
7159     default:
7160       gcc_assert (VECTOR_MODE_P (mode));
7161 
7162       if (bytes > 16)
7163 	return 0;
7164 
7165       gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7166 
7167       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7168 	classes[0] = X86_64_INTEGERSI_CLASS;
7169       else
7170 	classes[0] = X86_64_INTEGER_CLASS;
7171       classes[1] = X86_64_INTEGER_CLASS;
7172       return 1 + (bytes > 8);
7173     }
7174 }
7175 
7176 /* Examine the argument and return set number of register required in each
7177    class.  Return true iff parameter should be passed in memory.  */
7178 
7179 static bool
7180 examine_argument (machine_mode mode, const_tree type, int in_return,
7181 		  int *int_nregs, int *sse_nregs)
7182 {
7183   enum x86_64_reg_class regclass[MAX_CLASSES];
7184   int n = classify_argument (mode, type, regclass, 0);
7185 
7186   *int_nregs = 0;
7187   *sse_nregs = 0;
7188 
7189   if (!n)
7190     return true;
7191   for (n--; n >= 0; n--)
7192     switch (regclass[n])
7193       {
7194       case X86_64_INTEGER_CLASS:
7195       case X86_64_INTEGERSI_CLASS:
7196 	(*int_nregs)++;
7197 	break;
7198       case X86_64_SSE_CLASS:
7199       case X86_64_SSESF_CLASS:
7200       case X86_64_SSEDF_CLASS:
7201 	(*sse_nregs)++;
7202 	break;
7203       case X86_64_NO_CLASS:
7204       case X86_64_SSEUP_CLASS:
7205 	break;
7206       case X86_64_X87_CLASS:
7207       case X86_64_X87UP_CLASS:
7208       case X86_64_COMPLEX_X87_CLASS:
7209 	if (!in_return)
7210 	  return true;
7211 	break;
7212       case X86_64_MEMORY_CLASS:
7213 	gcc_unreachable ();
7214       }
7215 
7216   return false;
7217 }
7218 
7219 /* Construct container for the argument used by GCC interface.  See
7220    FUNCTION_ARG for the detailed description.  */
7221 
7222 static rtx
7223 construct_container (machine_mode mode, machine_mode orig_mode,
7224 		     const_tree type, int in_return, int nintregs, int nsseregs,
7225 		     const int *intreg, int sse_regno)
7226 {
7227   /* The following variables hold the static issued_error state.  */
7228   static bool issued_sse_arg_error;
7229   static bool issued_sse_ret_error;
7230   static bool issued_x87_ret_error;
7231 
7232   machine_mode tmpmode;
7233   int bytes =
7234     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7235   enum x86_64_reg_class regclass[MAX_CLASSES];
7236   int n;
7237   int i;
7238   int nexps = 0;
7239   int needed_sseregs, needed_intregs;
7240   rtx exp[MAX_CLASSES];
7241   rtx ret;
7242 
7243   n = classify_argument (mode, type, regclass, 0);
7244   if (!n)
7245     return NULL;
7246   if (examine_argument (mode, type, in_return, &needed_intregs,
7247 			&needed_sseregs))
7248     return NULL;
7249   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7250     return NULL;
7251 
7252   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
7253      some less clueful developer tries to use floating-point anyway.  */
7254   if (needed_sseregs && !TARGET_SSE)
7255     {
7256       if (in_return)
7257 	{
7258 	  if (!issued_sse_ret_error)
7259 	    {
7260 	      error ("SSE register return with SSE disabled");
7261 	      issued_sse_ret_error = true;
7262 	    }
7263 	}
7264       else if (!issued_sse_arg_error)
7265 	{
7266 	  error ("SSE register argument with SSE disabled");
7267 	  issued_sse_arg_error = true;
7268 	}
7269       return NULL;
7270     }
7271 
7272   /* Likewise, error if the ABI requires us to return values in the
7273      x87 registers and the user specified -mno-80387.  */
7274   if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7275     for (i = 0; i < n; i++)
7276       if (regclass[i] == X86_64_X87_CLASS
7277 	  || regclass[i] == X86_64_X87UP_CLASS
7278 	  || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7279 	{
7280 	  if (!issued_x87_ret_error)
7281 	    {
7282 	      error ("x87 register return with x87 disabled");
7283 	      issued_x87_ret_error = true;
7284 	    }
7285 	  return NULL;
7286 	}
7287 
7288   /* First construct simple cases.  Avoid SCmode, since we want to use
7289      single register to pass this type.  */
7290   if (n == 1 && mode != SCmode)
7291     switch (regclass[0])
7292       {
7293       case X86_64_INTEGER_CLASS:
7294       case X86_64_INTEGERSI_CLASS:
7295 	return gen_rtx_REG (mode, intreg[0]);
7296       case X86_64_SSE_CLASS:
7297       case X86_64_SSESF_CLASS:
7298       case X86_64_SSEDF_CLASS:
7299 	if (mode != BLKmode)
7300 	  return gen_reg_or_parallel (mode, orig_mode,
7301 				      SSE_REGNO (sse_regno));
7302 	break;
7303       case X86_64_X87_CLASS:
7304       case X86_64_COMPLEX_X87_CLASS:
7305 	return gen_rtx_REG (mode, FIRST_STACK_REG);
7306       case X86_64_NO_CLASS:
7307 	/* Zero sized array, struct or class.  */
7308 	return NULL;
7309       default:
7310 	gcc_unreachable ();
7311       }
7312   if (n == 2
7313       && regclass[0] == X86_64_SSE_CLASS
7314       && regclass[1] == X86_64_SSEUP_CLASS
7315       && mode != BLKmode)
7316     return gen_reg_or_parallel (mode, orig_mode,
7317 				SSE_REGNO (sse_regno));
7318   if (n == 4
7319       && regclass[0] == X86_64_SSE_CLASS
7320       && regclass[1] == X86_64_SSEUP_CLASS
7321       && regclass[2] == X86_64_SSEUP_CLASS
7322       && regclass[3] == X86_64_SSEUP_CLASS
7323       && mode != BLKmode)
7324     return gen_reg_or_parallel (mode, orig_mode,
7325 				SSE_REGNO (sse_regno));
7326   if (n == 8
7327       && regclass[0] == X86_64_SSE_CLASS
7328       && regclass[1] == X86_64_SSEUP_CLASS
7329       && regclass[2] == X86_64_SSEUP_CLASS
7330       && regclass[3] == X86_64_SSEUP_CLASS
7331       && regclass[4] == X86_64_SSEUP_CLASS
7332       && regclass[5] == X86_64_SSEUP_CLASS
7333       && regclass[6] == X86_64_SSEUP_CLASS
7334       && regclass[7] == X86_64_SSEUP_CLASS
7335       && mode != BLKmode)
7336     return gen_reg_or_parallel (mode, orig_mode,
7337 				SSE_REGNO (sse_regno));
7338   if (n == 2
7339       && regclass[0] == X86_64_X87_CLASS
7340       && regclass[1] == X86_64_X87UP_CLASS)
7341     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7342 
7343   if (n == 2
7344       && regclass[0] == X86_64_INTEGER_CLASS
7345       && regclass[1] == X86_64_INTEGER_CLASS
7346       && (mode == CDImode || mode == TImode)
7347       && intreg[0] + 1 == intreg[1])
7348     return gen_rtx_REG (mode, intreg[0]);
7349 
7350   /* Otherwise figure out the entries of the PARALLEL.  */
7351   for (i = 0; i < n; i++)
7352     {
7353       int pos;
7354 
7355       switch (regclass[i])
7356         {
7357 	  case X86_64_NO_CLASS:
7358 	    break;
7359 	  case X86_64_INTEGER_CLASS:
7360 	  case X86_64_INTEGERSI_CLASS:
7361 	    /* Merge TImodes on aligned occasions here too.  */
7362 	    if (i * 8 + 8 > bytes)
7363 	      tmpmode
7364 		= mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7365 	    else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7366 	      tmpmode = SImode;
7367 	    else
7368 	      tmpmode = DImode;
7369 	    /* We've requested 24 bytes we
7370 	       don't have mode for.  Use DImode.  */
7371 	    if (tmpmode == BLKmode)
7372 	      tmpmode = DImode;
7373 	    exp [nexps++]
7374 	      = gen_rtx_EXPR_LIST (VOIDmode,
7375 				   gen_rtx_REG (tmpmode, *intreg),
7376 				   GEN_INT (i*8));
7377 	    intreg++;
7378 	    break;
7379 	  case X86_64_SSESF_CLASS:
7380 	    exp [nexps++]
7381 	      = gen_rtx_EXPR_LIST (VOIDmode,
7382 				   gen_rtx_REG (SFmode,
7383 						SSE_REGNO (sse_regno)),
7384 				   GEN_INT (i*8));
7385 	    sse_regno++;
7386 	    break;
7387 	  case X86_64_SSEDF_CLASS:
7388 	    exp [nexps++]
7389 	      = gen_rtx_EXPR_LIST (VOIDmode,
7390 				   gen_rtx_REG (DFmode,
7391 						SSE_REGNO (sse_regno)),
7392 				   GEN_INT (i*8));
7393 	    sse_regno++;
7394 	    break;
7395 	  case X86_64_SSE_CLASS:
7396 	    pos = i;
7397 	    switch (n)
7398 	      {
7399 	      case 1:
7400 		tmpmode = DImode;
7401 		break;
7402 	      case 2:
7403 		if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7404 		  {
7405 		    tmpmode = TImode;
7406 		    i++;
7407 		  }
7408 		else
7409 		  tmpmode = DImode;
7410 		break;
7411 	      case 4:
7412 		gcc_assert (i == 0
7413 			    && regclass[1] == X86_64_SSEUP_CLASS
7414 			    && regclass[2] == X86_64_SSEUP_CLASS
7415 			    && regclass[3] == X86_64_SSEUP_CLASS);
7416 		tmpmode = OImode;
7417 		i += 3;
7418 		break;
7419 	      case 8:
7420 		gcc_assert (i == 0
7421 			    && regclass[1] == X86_64_SSEUP_CLASS
7422 			    && regclass[2] == X86_64_SSEUP_CLASS
7423 			    && regclass[3] == X86_64_SSEUP_CLASS
7424 			    && regclass[4] == X86_64_SSEUP_CLASS
7425 			    && regclass[5] == X86_64_SSEUP_CLASS
7426 			    && regclass[6] == X86_64_SSEUP_CLASS
7427 			    && regclass[7] == X86_64_SSEUP_CLASS);
7428 		tmpmode = XImode;
7429 		i += 7;
7430 		break;
7431 	      default:
7432 		gcc_unreachable ();
7433 	      }
7434 	    exp [nexps++]
7435 	      = gen_rtx_EXPR_LIST (VOIDmode,
7436 				   gen_rtx_REG (tmpmode,
7437 						SSE_REGNO (sse_regno)),
7438 				   GEN_INT (pos*8));
7439 	    sse_regno++;
7440 	    break;
7441 	  default:
7442 	    gcc_unreachable ();
7443 	}
7444     }
7445 
7446   /* Empty aligned struct, union or class.  */
7447   if (nexps == 0)
7448     return NULL;
7449 
7450   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7451   for (i = 0; i < nexps; i++)
7452     XVECEXP (ret, 0, i) = exp [i];
7453   return ret;
7454 }
7455 
7456 /* Update the data in CUM to advance over an argument of mode MODE
7457    and data type TYPE.  (TYPE is null for libcalls where that information
7458    may not be available.)
7459 
7460    Return a number of integer regsiters advanced over.  */
7461 
7462 static int
7463 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7464 			 const_tree type, HOST_WIDE_INT bytes,
7465 			 HOST_WIDE_INT words)
7466 {
7467   int res = 0;
7468   bool error_p = NULL;
7469 
7470   switch (mode)
7471     {
7472     default:
7473       break;
7474 
7475     case BLKmode:
7476       if (bytes < 0)
7477 	break;
7478       /* FALLTHRU */
7479 
7480     case DImode:
7481     case SImode:
7482     case HImode:
7483     case QImode:
7484       cum->words += words;
7485       cum->nregs -= words;
7486       cum->regno += words;
7487       if (cum->nregs >= 0)
7488 	res = words;
7489       if (cum->nregs <= 0)
7490 	{
7491 	  cum->nregs = 0;
7492 	  cum->regno = 0;
7493 	}
7494       break;
7495 
7496     case OImode:
7497       /* OImode shouldn't be used directly.  */
7498       gcc_unreachable ();
7499 
7500     case DFmode:
7501       if (cum->float_in_sse == -1)
7502 	error_p = 1;
7503       if (cum->float_in_sse < 2)
7504 	break;
7505     case SFmode:
7506       if (cum->float_in_sse == -1)
7507 	error_p = 1;
7508       if (cum->float_in_sse < 1)
7509 	break;
7510       /* FALLTHRU */
7511 
7512     case V8SFmode:
7513     case V8SImode:
7514     case V64QImode:
7515     case V32HImode:
7516     case V16SImode:
7517     case V8DImode:
7518     case V16SFmode:
7519     case V8DFmode:
7520     case V32QImode:
7521     case V16HImode:
7522     case V4DFmode:
7523     case V4DImode:
7524     case TImode:
7525     case V16QImode:
7526     case V8HImode:
7527     case V4SImode:
7528     case V2DImode:
7529     case V4SFmode:
7530     case V2DFmode:
7531       if (!type || !AGGREGATE_TYPE_P (type))
7532 	{
7533 	  cum->sse_words += words;
7534 	  cum->sse_nregs -= 1;
7535 	  cum->sse_regno += 1;
7536 	  if (cum->sse_nregs <= 0)
7537 	    {
7538 	      cum->sse_nregs = 0;
7539 	      cum->sse_regno = 0;
7540 	    }
7541 	}
7542       break;
7543 
7544     case V8QImode:
7545     case V4HImode:
7546     case V2SImode:
7547     case V2SFmode:
7548     case V1TImode:
7549     case V1DImode:
7550       if (!type || !AGGREGATE_TYPE_P (type))
7551 	{
7552 	  cum->mmx_words += words;
7553 	  cum->mmx_nregs -= 1;
7554 	  cum->mmx_regno += 1;
7555 	  if (cum->mmx_nregs <= 0)
7556 	    {
7557 	      cum->mmx_nregs = 0;
7558 	      cum->mmx_regno = 0;
7559 	    }
7560 	}
7561       break;
7562     }
7563   if (error_p)
7564     {
7565       cum->float_in_sse = 0;
7566       error ("calling %qD with SSE calling convention without "
7567 	     "SSE/SSE2 enabled", cum->decl);
7568       sorry ("this is a GCC bug that can be worked around by adding "
7569 	     "attribute used to function called");
7570     }
7571 
7572   return res;
7573 }
7574 
7575 static int
7576 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7577 			 const_tree type, HOST_WIDE_INT words, bool named)
7578 {
7579   int int_nregs, sse_nregs;
7580 
7581   /* Unnamed 512 and 256bit vector mode parameters are passed on stack.  */
7582   if (!named && (VALID_AVX512F_REG_MODE (mode)
7583 		 || VALID_AVX256_REG_MODE (mode)))
7584     return 0;
7585 
7586   if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7587       && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7588     {
7589       cum->nregs -= int_nregs;
7590       cum->sse_nregs -= sse_nregs;
7591       cum->regno += int_nregs;
7592       cum->sse_regno += sse_nregs;
7593       return int_nregs;
7594     }
7595   else
7596     {
7597       int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7598       cum->words = (cum->words + align - 1) & ~(align - 1);
7599       cum->words += words;
7600       return 0;
7601     }
7602 }
7603 
7604 static int
7605 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7606 			    HOST_WIDE_INT words)
7607 {
7608   /* Otherwise, this should be passed indirect.  */
7609   gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7610 
7611   cum->words += words;
7612   if (cum->nregs > 0)
7613     {
7614       cum->nregs -= 1;
7615       cum->regno += 1;
7616       return 1;
7617     }
7618   return 0;
7619 }
7620 
7621 /* Update the data in CUM to advance over an argument of mode MODE and
7622    data type TYPE.  (TYPE is null for libcalls where that information
7623    may not be available.)  */
7624 
7625 static void
7626 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7627 			   const_tree type, bool named)
7628 {
7629   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7630   HOST_WIDE_INT bytes, words;
7631   int nregs;
7632 
7633   if (mode == BLKmode)
7634     bytes = int_size_in_bytes (type);
7635   else
7636     bytes = GET_MODE_SIZE (mode);
7637   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7638 
7639   if (type)
7640     mode = type_natural_mode (type, NULL, false);
7641 
7642   if ((type && POINTER_BOUNDS_TYPE_P (type))
7643       || POINTER_BOUNDS_MODE_P (mode))
7644     {
7645       /* If we pass bounds in BT then just update remained bounds count.  */
7646       if (cum->bnds_in_bt)
7647 	{
7648 	  cum->bnds_in_bt--;
7649 	  return;
7650 	}
7651 
7652       /* Update remained number of bounds to force.  */
7653       if (cum->force_bnd_pass)
7654 	cum->force_bnd_pass--;
7655 
7656       cum->bnd_regno++;
7657 
7658       return;
7659     }
7660 
7661   /* The first arg not going to Bounds Tables resets this counter.  */
7662   cum->bnds_in_bt = 0;
7663   /* For unnamed args we always pass bounds to avoid bounds mess when
7664      passed and received types do not match.  If bounds do not follow
7665      unnamed arg, still pretend required number of bounds were passed.  */
7666   if (cum->force_bnd_pass)
7667     {
7668       cum->bnd_regno += cum->force_bnd_pass;
7669       cum->force_bnd_pass = 0;
7670     }
7671 
7672   if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7673     nregs = function_arg_advance_ms_64 (cum, bytes, words);
7674   else if (TARGET_64BIT)
7675     nregs = function_arg_advance_64 (cum, mode, type, words, named);
7676   else
7677     nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7678 
7679   /* For stdarg we expect bounds to be passed for each value passed
7680      in register.  */
7681   if (cum->stdarg)
7682     cum->force_bnd_pass = nregs;
7683   /* For pointers passed in memory we expect bounds passed in Bounds
7684      Table.  */
7685   if (!nregs)
7686     cum->bnds_in_bt = chkp_type_bounds_count (type);
7687 }
7688 
7689 /* Define where to put the arguments to a function.
7690    Value is zero to push the argument on the stack,
7691    or a hard register in which to store the argument.
7692 
7693    MODE is the argument's machine mode.
7694    TYPE is the data type of the argument (as a tree).
7695     This is null for libcalls where that information may
7696     not be available.
7697    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7698     the preceding args and about the function being called.
7699    NAMED is nonzero if this argument is a named parameter
7700     (otherwise it is an extra parameter matching an ellipsis).  */
7701 
7702 static rtx
7703 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7704 		 machine_mode orig_mode, const_tree type,
7705 		 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7706 {
7707   bool error_p = false;
7708   /* Avoid the AL settings for the Unix64 ABI.  */
7709   if (mode == VOIDmode)
7710     return constm1_rtx;
7711 
7712   switch (mode)
7713     {
7714     default:
7715       break;
7716 
7717     case BLKmode:
7718       if (bytes < 0)
7719 	break;
7720       /* FALLTHRU */
7721     case DImode:
7722     case SImode:
7723     case HImode:
7724     case QImode:
7725       if (words <= cum->nregs)
7726 	{
7727 	  int regno = cum->regno;
7728 
7729 	  /* Fastcall allocates the first two DWORD (SImode) or
7730             smaller arguments to ECX and EDX if it isn't an
7731             aggregate type .  */
7732 	  if (cum->fastcall)
7733 	    {
7734 	      if (mode == BLKmode
7735 		  || mode == DImode
7736 		  || (type && AGGREGATE_TYPE_P (type)))
7737 	        break;
7738 
7739 	      /* ECX not EAX is the first allocated register.  */
7740 	      if (regno == AX_REG)
7741 		regno = CX_REG;
7742 	    }
7743 	  return gen_rtx_REG (mode, regno);
7744 	}
7745       break;
7746 
7747     case DFmode:
7748       if (cum->float_in_sse == -1)
7749 	error_p = 1;
7750       if (cum->float_in_sse < 2)
7751 	break;
7752     case SFmode:
7753       if (cum->float_in_sse == -1)
7754 	error_p = 1;
7755       if (cum->float_in_sse < 1)
7756 	break;
7757       /* FALLTHRU */
7758     case TImode:
7759       /* In 32bit, we pass TImode in xmm registers.  */
7760     case V16QImode:
7761     case V8HImode:
7762     case V4SImode:
7763     case V2DImode:
7764     case V4SFmode:
7765     case V2DFmode:
7766       if (!type || !AGGREGATE_TYPE_P (type))
7767 	{
7768 	  if (cum->sse_nregs)
7769 	    return gen_reg_or_parallel (mode, orig_mode,
7770 				        cum->sse_regno + FIRST_SSE_REG);
7771 	}
7772       break;
7773 
7774     case OImode:
7775     case XImode:
7776       /* OImode and XImode shouldn't be used directly.  */
7777       gcc_unreachable ();
7778 
7779     case V64QImode:
7780     case V32HImode:
7781     case V16SImode:
7782     case V8DImode:
7783     case V16SFmode:
7784     case V8DFmode:
7785     case V8SFmode:
7786     case V8SImode:
7787     case V32QImode:
7788     case V16HImode:
7789     case V4DFmode:
7790     case V4DImode:
7791       if (!type || !AGGREGATE_TYPE_P (type))
7792 	{
7793 	  if (cum->sse_nregs)
7794 	    return gen_reg_or_parallel (mode, orig_mode,
7795 				        cum->sse_regno + FIRST_SSE_REG);
7796 	}
7797       break;
7798 
7799     case V8QImode:
7800     case V4HImode:
7801     case V2SImode:
7802     case V2SFmode:
7803     case V1TImode:
7804     case V1DImode:
7805       if (!type || !AGGREGATE_TYPE_P (type))
7806 	{
7807 	  if (cum->mmx_nregs)
7808 	    return gen_reg_or_parallel (mode, orig_mode,
7809 				        cum->mmx_regno + FIRST_MMX_REG);
7810 	}
7811       break;
7812     }
7813   if (error_p)
7814     {
7815       cum->float_in_sse = 0;
7816       error ("calling %qD with SSE calling convention without "
7817 	     "SSE/SSE2 enabled", cum->decl);
7818       sorry ("this is a GCC bug that can be worked around by adding "
7819 	     "attribute used to function called");
7820     }
7821 
7822   return NULL_RTX;
7823 }
7824 
7825 static rtx
7826 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7827 		 machine_mode orig_mode, const_tree type, bool named)
7828 {
7829   /* Handle a hidden AL argument containing number of registers
7830      for varargs x86-64 functions.  */
7831   if (mode == VOIDmode)
7832     return GEN_INT (cum->maybe_vaarg
7833 		    ? (cum->sse_nregs < 0
7834 		       ? X86_64_SSE_REGPARM_MAX
7835 		       : cum->sse_regno)
7836 		    : -1);
7837 
7838   switch (mode)
7839     {
7840     default:
7841       break;
7842 
7843     case V8SFmode:
7844     case V8SImode:
7845     case V32QImode:
7846     case V16HImode:
7847     case V4DFmode:
7848     case V4DImode:
7849     case V16SFmode:
7850     case V16SImode:
7851     case V64QImode:
7852     case V32HImode:
7853     case V8DFmode:
7854     case V8DImode:
7855       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
7856       if (!named)
7857 	return NULL;
7858       break;
7859     }
7860 
7861   return construct_container (mode, orig_mode, type, 0, cum->nregs,
7862 			      cum->sse_nregs,
7863 			      &x86_64_int_parameter_registers [cum->regno],
7864 			      cum->sse_regno);
7865 }
7866 
7867 static rtx
7868 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7869 		    machine_mode orig_mode, bool named,
7870 		    HOST_WIDE_INT bytes)
7871 {
7872   unsigned int regno;
7873 
7874   /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7875      We use value of -2 to specify that current function call is MSABI.  */
7876   if (mode == VOIDmode)
7877     return GEN_INT (-2);
7878 
7879   /* If we've run out of registers, it goes on the stack.  */
7880   if (cum->nregs == 0)
7881     return NULL_RTX;
7882 
7883   regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7884 
7885   /* Only floating point modes are passed in anything but integer regs.  */
7886   if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7887     {
7888       if (named)
7889 	regno = cum->regno + FIRST_SSE_REG;
7890       else
7891 	{
7892 	  rtx t1, t2;
7893 
7894 	  /* Unnamed floating parameters are passed in both the
7895 	     SSE and integer registers.  */
7896 	  t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7897 	  t2 = gen_rtx_REG (mode, regno);
7898 	  t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7899 	  t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7900 	  return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7901 	}
7902     }
7903   /* Handle aggregated types passed in register.  */
7904   if (orig_mode == BLKmode)
7905     {
7906       if (bytes > 0 && bytes <= 8)
7907         mode = (bytes > 4 ? DImode : SImode);
7908       if (mode == BLKmode)
7909         mode = DImode;
7910     }
7911 
7912   return gen_reg_or_parallel (mode, orig_mode, regno);
7913 }
7914 
7915 /* Return where to put the arguments to a function.
7916    Return zero to push the argument on the stack, or a hard register in which to store the argument.
7917 
7918    MODE is the argument's machine mode.  TYPE is the data type of the
7919    argument.  It is null for libcalls where that information may not be
7920    available.  CUM gives information about the preceding args and about
7921    the function being called.  NAMED is nonzero if this argument is a
7922    named parameter (otherwise it is an extra parameter matching an
7923    ellipsis).  */
7924 
7925 static rtx
7926 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7927 		   const_tree type, bool named)
7928 {
7929   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7930   machine_mode mode = omode;
7931   HOST_WIDE_INT bytes, words;
7932   rtx arg;
7933 
7934   /* All pointer bounds argumntas are handled separately here.  */
7935   if ((type && POINTER_BOUNDS_TYPE_P (type))
7936       || POINTER_BOUNDS_MODE_P (mode))
7937     {
7938       /* Return NULL if bounds are forced to go in Bounds Table.  */
7939       if (cum->bnds_in_bt)
7940 	arg = NULL;
7941       /* Return the next available bound reg if any.  */
7942       else if (cum->bnd_regno <= LAST_BND_REG)
7943 	arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7944       /* Return the next special slot number otherwise.  */
7945       else
7946 	arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7947 
7948       return arg;
7949     }
7950 
7951   if (mode == BLKmode)
7952     bytes = int_size_in_bytes (type);
7953   else
7954     bytes = GET_MODE_SIZE (mode);
7955   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7956 
7957   /* To simplify the code below, represent vector types with a vector mode
7958      even if MMX/SSE are not active.  */
7959   if (type && TREE_CODE (type) == VECTOR_TYPE)
7960     mode = type_natural_mode (type, cum, false);
7961 
7962   if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7963     arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7964   else if (TARGET_64BIT)
7965     arg = function_arg_64 (cum, mode, omode, type, named);
7966   else
7967     arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7968 
7969   return arg;
7970 }
7971 
7972 /* A C expression that indicates when an argument must be passed by
7973    reference.  If nonzero for an argument, a copy of that argument is
7974    made in memory and a pointer to the argument is passed instead of
7975    the argument itself.  The pointer is passed in whatever way is
7976    appropriate for passing a pointer to that type.  */
7977 
7978 static bool
7979 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7980 			const_tree type, bool)
7981 {
7982   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7983 
7984   /* Bounds are never passed by reference.  */
7985   if ((type && POINTER_BOUNDS_TYPE_P (type))
7986       || POINTER_BOUNDS_MODE_P (mode))
7987     return false;
7988 
7989   /* See Windows x64 Software Convention.  */
7990   if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7991     {
7992       int msize = (int) GET_MODE_SIZE (mode);
7993       if (type)
7994 	{
7995 	  /* Arrays are passed by reference.  */
7996 	  if (TREE_CODE (type) == ARRAY_TYPE)
7997 	    return true;
7998 
7999 	  if (AGGREGATE_TYPE_P (type))
8000 	    {
8001 	      /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8002 	         are passed by reference.  */
8003 	      msize = int_size_in_bytes (type);
8004 	    }
8005 	}
8006 
8007       /* __m128 is passed by reference.  */
8008       switch (msize) {
8009       case 1: case 2: case 4: case 8:
8010         break;
8011       default:
8012         return true;
8013       }
8014     }
8015   else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
8016     return 1;
8017 
8018   return 0;
8019 }
8020 
8021 /* Return true when TYPE should be 128bit aligned for 32bit argument
8022    passing ABI.  XXX: This function is obsolete and is only used for
8023    checking psABI compatibility with previous versions of GCC.  */
8024 
8025 static bool
8026 ix86_compat_aligned_value_p (const_tree type)
8027 {
8028   machine_mode mode = TYPE_MODE (type);
8029   if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8030        || mode == TDmode
8031        || mode == TFmode
8032        || mode == TCmode)
8033       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8034     return true;
8035   if (TYPE_ALIGN (type) < 128)
8036     return false;
8037 
8038   if (AGGREGATE_TYPE_P (type))
8039     {
8040       /* Walk the aggregates recursively.  */
8041       switch (TREE_CODE (type))
8042 	{
8043 	case RECORD_TYPE:
8044 	case UNION_TYPE:
8045 	case QUAL_UNION_TYPE:
8046 	  {
8047 	    tree field;
8048 
8049 	    /* Walk all the structure fields.  */
8050 	    for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8051 	      {
8052 		if (TREE_CODE (field) == FIELD_DECL
8053 		    && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8054 		  return true;
8055 	      }
8056 	    break;
8057 	  }
8058 
8059 	case ARRAY_TYPE:
8060 	  /* Just for use if some languages passes arrays by value.  */
8061 	  if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8062 	    return true;
8063 	  break;
8064 
8065 	default:
8066 	  gcc_unreachable ();
8067 	}
8068     }
8069   return false;
8070 }
8071 
8072 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8073    XXX: This function is obsolete and is only used for checking psABI
8074    compatibility with previous versions of GCC.  */
8075 
8076 static unsigned int
8077 ix86_compat_function_arg_boundary (machine_mode mode,
8078 				   const_tree type, unsigned int align)
8079 {
8080   /* In 32bit, only _Decimal128 and __float128 are aligned to their
8081      natural boundaries.  */
8082   if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8083     {
8084       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
8085 	 make an exception for SSE modes since these require 128bit
8086 	 alignment.
8087 
8088 	 The handling here differs from field_alignment.  ICC aligns MMX
8089 	 arguments to 4 byte boundaries, while structure fields are aligned
8090 	 to 8 byte boundaries.  */
8091       if (!type)
8092 	{
8093 	  if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8094 	    align = PARM_BOUNDARY;
8095 	}
8096       else
8097 	{
8098 	  if (!ix86_compat_aligned_value_p (type))
8099 	    align = PARM_BOUNDARY;
8100 	}
8101     }
8102   if (align > BIGGEST_ALIGNMENT)
8103     align = BIGGEST_ALIGNMENT;
8104   return align;
8105 }
8106 
8107 /* Return true when TYPE should be 128bit aligned for 32bit argument
8108    passing ABI.  */
8109 
8110 static bool
8111 ix86_contains_aligned_value_p (const_tree type)
8112 {
8113   machine_mode mode = TYPE_MODE (type);
8114 
8115   if (mode == XFmode || mode == XCmode)
8116     return false;
8117 
8118   if (TYPE_ALIGN (type) < 128)
8119     return false;
8120 
8121   if (AGGREGATE_TYPE_P (type))
8122     {
8123       /* Walk the aggregates recursively.  */
8124       switch (TREE_CODE (type))
8125 	{
8126 	case RECORD_TYPE:
8127 	case UNION_TYPE:
8128 	case QUAL_UNION_TYPE:
8129 	  {
8130 	    tree field;
8131 
8132 	    /* Walk all the structure fields.  */
8133 	    for (field = TYPE_FIELDS (type);
8134 		 field;
8135 		 field = DECL_CHAIN (field))
8136 	      {
8137 		if (TREE_CODE (field) == FIELD_DECL
8138 		    && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8139 		  return true;
8140 	      }
8141 	    break;
8142 	  }
8143 
8144 	case ARRAY_TYPE:
8145 	  /* Just for use if some languages passes arrays by value.  */
8146 	  if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8147 	    return true;
8148 	  break;
8149 
8150 	default:
8151 	  gcc_unreachable ();
8152 	}
8153     }
8154   else
8155     return TYPE_ALIGN (type) >= 128;
8156 
8157   return false;
8158 }
8159 
8160 /* Gives the alignment boundary, in bits, of an argument with the
8161    specified mode and type.  */
8162 
8163 static unsigned int
8164 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8165 {
8166   unsigned int align;
8167   if (type)
8168     {
8169       /* Since the main variant type is used for call, we convert it to
8170 	 the main variant type.  */
8171       type = TYPE_MAIN_VARIANT (type);
8172       align = TYPE_ALIGN (type);
8173     }
8174   else
8175     align = GET_MODE_ALIGNMENT (mode);
8176   if (align < PARM_BOUNDARY)
8177     align = PARM_BOUNDARY;
8178   else
8179     {
8180       static bool warned;
8181       unsigned int saved_align = align;
8182 
8183       if (!TARGET_64BIT)
8184 	{
8185 	  /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
8186 	  if (!type)
8187 	    {
8188 	      if (mode == XFmode || mode == XCmode)
8189 		align = PARM_BOUNDARY;
8190 	    }
8191 	  else if (!ix86_contains_aligned_value_p (type))
8192 	    align = PARM_BOUNDARY;
8193 
8194 	  if (align < 128)
8195 	    align = PARM_BOUNDARY;
8196 	}
8197 
8198       if (warn_psabi
8199 	  && !warned
8200 	  && align != ix86_compat_function_arg_boundary (mode, type,
8201 							 saved_align))
8202 	{
8203 	  warned = true;
8204 	  inform (input_location,
8205 		  "The ABI for passing parameters with %d-byte"
8206 		  " alignment has changed in GCC 4.6",
8207 		  align / BITS_PER_UNIT);
8208 	}
8209     }
8210 
8211   return align;
8212 }
8213 
8214 /* Return true if N is a possible register number of function value.  */
8215 
8216 static bool
8217 ix86_function_value_regno_p (const unsigned int regno)
8218 {
8219   switch (regno)
8220     {
8221     case AX_REG:
8222       return true;
8223     case DX_REG:
8224       return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8225     case DI_REG:
8226     case SI_REG:
8227       return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8228 
8229     case BND0_REG:
8230     case BND1_REG:
8231       return chkp_function_instrumented_p (current_function_decl);
8232 
8233       /* Complex values are returned in %st(0)/%st(1) pair.  */
8234     case ST0_REG:
8235     case ST1_REG:
8236       /* TODO: The function should depend on current function ABI but
8237        builtins.c would need updating then. Therefore we use the
8238        default ABI.  */
8239       if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8240 	return false;
8241       return TARGET_FLOAT_RETURNS_IN_80387;
8242 
8243       /* Complex values are returned in %xmm0/%xmm1 pair.  */
8244     case XMM0_REG:
8245     case XMM1_REG:
8246       return TARGET_SSE;
8247 
8248     case MM0_REG:
8249       if (TARGET_MACHO || TARGET_64BIT)
8250 	return false;
8251       return TARGET_MMX;
8252     }
8253 
8254   return false;
8255 }
8256 
8257 /* Define how to find the value returned by a function.
8258    VALTYPE is the data type of the value (as a tree).
8259    If the precise function being called is known, FUNC is its FUNCTION_DECL;
8260    otherwise, FUNC is 0.  */
8261 
8262 static rtx
8263 function_value_32 (machine_mode orig_mode, machine_mode mode,
8264 		   const_tree fntype, const_tree fn)
8265 {
8266   unsigned int regno;
8267 
8268   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8269      we normally prevent this case when mmx is not available.  However
8270      some ABIs may require the result to be returned like DImode.  */
8271   if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8272     regno = FIRST_MMX_REG;
8273 
8274   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
8275      we prevent this case when sse is not available.  However some ABIs
8276      may require the result to be returned like integer TImode.  */
8277   else if (mode == TImode
8278 	   || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8279     regno = FIRST_SSE_REG;
8280 
8281   /* 32-byte vector modes in %ymm0.   */
8282   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8283     regno = FIRST_SSE_REG;
8284 
8285   /* 64-byte vector modes in %zmm0.   */
8286   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8287     regno = FIRST_SSE_REG;
8288 
8289   /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
8290   else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8291     regno = FIRST_FLOAT_REG;
8292   else
8293     /* Most things go in %eax.  */
8294     regno = AX_REG;
8295 
8296   /* Override FP return register with %xmm0 for local functions when
8297      SSE math is enabled or for functions with sseregparm attribute.  */
8298   if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8299     {
8300       int sse_level = ix86_function_sseregparm (fntype, fn, false);
8301       if (sse_level == -1)
8302 	{
8303 	  error ("calling %qD with SSE caling convention without "
8304 		 "SSE/SSE2 enabled", fn);
8305 	  sorry ("this is a GCC bug that can be worked around by adding "
8306 		 "attribute used to function called");
8307 	}
8308       else if ((sse_level >= 1 && mode == SFmode)
8309 	       || (sse_level == 2 && mode == DFmode))
8310 	regno = FIRST_SSE_REG;
8311     }
8312 
8313   /* OImode shouldn't be used directly.  */
8314   gcc_assert (mode != OImode);
8315 
8316   return gen_rtx_REG (orig_mode, regno);
8317 }
8318 
8319 static rtx
8320 function_value_64 (machine_mode orig_mode, machine_mode mode,
8321 		   const_tree valtype)
8322 {
8323   rtx ret;
8324 
8325   /* Handle libcalls, which don't provide a type node.  */
8326   if (valtype == NULL)
8327     {
8328       unsigned int regno;
8329 
8330       switch (mode)
8331 	{
8332 	case SFmode:
8333 	case SCmode:
8334 	case DFmode:
8335 	case DCmode:
8336 	case TFmode:
8337 	case SDmode:
8338 	case DDmode:
8339 	case TDmode:
8340 	  regno = FIRST_SSE_REG;
8341 	  break;
8342 	case XFmode:
8343 	case XCmode:
8344 	  regno = FIRST_FLOAT_REG;
8345 	  break;
8346 	case TCmode:
8347 	  return NULL;
8348 	default:
8349 	  regno = AX_REG;
8350 	}
8351 
8352       return gen_rtx_REG (mode, regno);
8353     }
8354   else if (POINTER_TYPE_P (valtype))
8355     {
8356       /* Pointers are always returned in word_mode.  */
8357       mode = word_mode;
8358     }
8359 
8360   ret = construct_container (mode, orig_mode, valtype, 1,
8361 			     X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8362 			     x86_64_int_return_registers, 0);
8363 
8364   /* For zero sized structures, construct_container returns NULL, but we
8365      need to keep rest of compiler happy by returning meaningful value.  */
8366   if (!ret)
8367     ret = gen_rtx_REG (orig_mode, AX_REG);
8368 
8369   return ret;
8370 }
8371 
8372 static rtx
8373 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8374 		      const_tree valtype)
8375 {
8376   unsigned int regno = AX_REG;
8377 
8378   if (TARGET_SSE)
8379     {
8380       switch (GET_MODE_SIZE (mode))
8381 	{
8382 	case 16:
8383 	  if (valtype != NULL_TREE
8384 	      && !VECTOR_INTEGER_TYPE_P (valtype)
8385 	      && !VECTOR_INTEGER_TYPE_P (valtype)
8386 	      && !INTEGRAL_TYPE_P (valtype)
8387 	      && !VECTOR_FLOAT_TYPE_P (valtype))
8388 	    break;
8389 	  if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8390 	      && !COMPLEX_MODE_P (mode))
8391 	    regno = FIRST_SSE_REG;
8392 	  break;
8393 	case 8:
8394 	case 4:
8395 	  if (mode == SFmode || mode == DFmode)
8396 	    regno = FIRST_SSE_REG;
8397 	  break;
8398 	default:
8399 	  break;
8400         }
8401     }
8402   return gen_rtx_REG (orig_mode, regno);
8403 }
8404 
8405 static rtx
8406 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8407 		       machine_mode orig_mode, machine_mode mode)
8408 {
8409   const_tree fn, fntype;
8410 
8411   fn = NULL_TREE;
8412   if (fntype_or_decl && DECL_P (fntype_or_decl))
8413     fn = fntype_or_decl;
8414   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8415 
8416   if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8417       || POINTER_BOUNDS_MODE_P (mode))
8418     return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8419   else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8420     return function_value_ms_64 (orig_mode, mode, valtype);
8421   else if (TARGET_64BIT)
8422     return function_value_64 (orig_mode, mode, valtype);
8423   else
8424     return function_value_32 (orig_mode, mode, fntype, fn);
8425 }
8426 
8427 static rtx
8428 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8429 {
8430   machine_mode mode, orig_mode;
8431 
8432   orig_mode = TYPE_MODE (valtype);
8433   mode = type_natural_mode (valtype, NULL, true);
8434   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8435 }
8436 
8437 /*  Return an RTX representing a place where a function returns
8438     or recieves pointer bounds or NULL if no bounds are returned.
8439 
8440     VALTYPE is a data type of a value returned by the function.
8441 
8442     FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8443     or FUNCTION_TYPE of the function.
8444 
8445     If OUTGOING is false, return a place in which the caller will
8446     see the return value.  Otherwise, return a place where a
8447     function returns a value.  */
8448 
8449 static rtx
8450 ix86_function_value_bounds (const_tree valtype,
8451 			    const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8452 			    bool outgoing ATTRIBUTE_UNUSED)
8453 {
8454   rtx res = NULL_RTX;
8455 
8456   if (BOUNDED_TYPE_P (valtype))
8457     res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8458   else if (chkp_type_has_pointer (valtype))
8459     {
8460       bitmap slots;
8461       rtx bounds[2];
8462       bitmap_iterator bi;
8463       unsigned i, bnd_no = 0;
8464 
8465       bitmap_obstack_initialize (NULL);
8466       slots = BITMAP_ALLOC (NULL);
8467       chkp_find_bound_slots (valtype, slots);
8468 
8469       EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8470 	{
8471 	  rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8472 	  rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8473 	  gcc_assert (bnd_no < 2);
8474 	  bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8475 	}
8476 
8477       res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8478 
8479       BITMAP_FREE (slots);
8480       bitmap_obstack_release (NULL);
8481     }
8482   else
8483     res = NULL_RTX;
8484 
8485   return res;
8486 }
8487 
8488 /* Pointer function arguments and return values are promoted to
8489    word_mode.  */
8490 
8491 static machine_mode
8492 ix86_promote_function_mode (const_tree type, machine_mode mode,
8493 			    int *punsignedp, const_tree fntype,
8494 			    int for_return)
8495 {
8496   if (type != NULL_TREE && POINTER_TYPE_P (type))
8497     {
8498       *punsignedp = POINTERS_EXTEND_UNSIGNED;
8499       return word_mode;
8500     }
8501   return default_promote_function_mode (type, mode, punsignedp, fntype,
8502 					for_return);
8503 }
8504 
8505 /* Return true if a structure, union or array with MODE containing FIELD
8506    should be accessed using BLKmode.  */
8507 
8508 static bool
8509 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8510 {
8511   /* Union with XFmode must be in BLKmode.  */
8512   return (mode == XFmode
8513 	  && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8514 	      || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8515 }
8516 
8517 rtx
8518 ix86_libcall_value (machine_mode mode)
8519 {
8520   return ix86_function_value_1 (NULL, NULL, mode, mode);
8521 }
8522 
8523 /* Return true iff type is returned in memory.  */
8524 
8525 static bool
8526 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8527 {
8528 #ifdef SUBTARGET_RETURN_IN_MEMORY
8529   return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8530 #else
8531   const machine_mode mode = type_natural_mode (type, NULL, true);
8532   HOST_WIDE_INT size;
8533 
8534   if (POINTER_BOUNDS_TYPE_P (type))
8535     return false;
8536 
8537   if (TARGET_64BIT)
8538     {
8539       if (ix86_function_type_abi (fntype) == MS_ABI)
8540 	{
8541 	  size = int_size_in_bytes (type);
8542 
8543 	  /* __m128 is returned in xmm0.  */
8544 	  if ((!type || VECTOR_INTEGER_TYPE_P (type)
8545 	       || INTEGRAL_TYPE_P (type)
8546 	       || VECTOR_FLOAT_TYPE_P (type))
8547 	      && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8548 	      && !COMPLEX_MODE_P (mode)
8549 	      && (GET_MODE_SIZE (mode) == 16 || size == 16))
8550 	    return false;
8551 
8552 	  /* Otherwise, the size must be exactly in [1248]. */
8553 	  return size != 1 && size != 2 && size != 4 && size != 8;
8554 	}
8555       else
8556 	{
8557 	  int needed_intregs, needed_sseregs;
8558 
8559 	  return examine_argument (mode, type, 1,
8560 				   &needed_intregs, &needed_sseregs);
8561 	}
8562     }
8563   else
8564     {
8565       if (mode == BLKmode)
8566 	return true;
8567 
8568       size = int_size_in_bytes (type);
8569 
8570       if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8571 	return false;
8572 
8573       if (VECTOR_MODE_P (mode) || mode == TImode)
8574 	{
8575 	  /* User-created vectors small enough to fit in EAX.  */
8576 	  if (size < 8)
8577 	    return false;
8578 
8579 	  /* Unless ABI prescibes otherwise,
8580 	     MMX/3dNow values are returned in MM0 if available.  */
8581 
8582 	  if (size == 8)
8583 	    return TARGET_VECT8_RETURNS || !TARGET_MMX;
8584 
8585 	  /* SSE values are returned in XMM0 if available.  */
8586 	  if (size == 16)
8587 	    return !TARGET_SSE;
8588 
8589 	  /* AVX values are returned in YMM0 if available.  */
8590 	  if (size == 32)
8591 	    return !TARGET_AVX;
8592 
8593 	  /* AVX512F values are returned in ZMM0 if available.  */
8594 	  if (size == 64)
8595 	    return !TARGET_AVX512F;
8596 	}
8597 
8598       if (mode == XFmode)
8599 	return false;
8600 
8601       if (size > 12)
8602 	return true;
8603 
8604       /* OImode shouldn't be used directly.  */
8605       gcc_assert (mode != OImode);
8606 
8607       return false;
8608     }
8609 #endif
8610 }
8611 
8612 
8613 /* Create the va_list data type.  */
8614 
8615 /* Returns the calling convention specific va_list date type.
8616    The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI.  */
8617 
8618 static tree
8619 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8620 {
8621   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8622 
8623   /* For i386 we use plain pointer to argument area.  */
8624   if (!TARGET_64BIT || abi == MS_ABI)
8625     return build_pointer_type (char_type_node);
8626 
8627   record = lang_hooks.types.make_type (RECORD_TYPE);
8628   type_decl = build_decl (BUILTINS_LOCATION,
8629 			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
8630 
8631   f_gpr = build_decl (BUILTINS_LOCATION,
8632 		      FIELD_DECL, get_identifier ("gp_offset"),
8633 		      unsigned_type_node);
8634   f_fpr = build_decl (BUILTINS_LOCATION,
8635 		      FIELD_DECL, get_identifier ("fp_offset"),
8636 		      unsigned_type_node);
8637   f_ovf = build_decl (BUILTINS_LOCATION,
8638 		      FIELD_DECL, get_identifier ("overflow_arg_area"),
8639 		      ptr_type_node);
8640   f_sav = build_decl (BUILTINS_LOCATION,
8641 		      FIELD_DECL, get_identifier ("reg_save_area"),
8642 		      ptr_type_node);
8643 
8644   va_list_gpr_counter_field = f_gpr;
8645   va_list_fpr_counter_field = f_fpr;
8646 
8647   DECL_FIELD_CONTEXT (f_gpr) = record;
8648   DECL_FIELD_CONTEXT (f_fpr) = record;
8649   DECL_FIELD_CONTEXT (f_ovf) = record;
8650   DECL_FIELD_CONTEXT (f_sav) = record;
8651 
8652   TYPE_STUB_DECL (record) = type_decl;
8653   TYPE_NAME (record) = type_decl;
8654   TYPE_FIELDS (record) = f_gpr;
8655   DECL_CHAIN (f_gpr) = f_fpr;
8656   DECL_CHAIN (f_fpr) = f_ovf;
8657   DECL_CHAIN (f_ovf) = f_sav;
8658 
8659   layout_type (record);
8660 
8661   /* The correct type is an array type of one element.  */
8662   return build_array_type (record, build_index_type (size_zero_node));
8663 }
8664 
8665 /* Setup the builtin va_list data type and for 64-bit the additional
8666    calling convention specific va_list data types.  */
8667 
8668 static tree
8669 ix86_build_builtin_va_list (void)
8670 {
8671   tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8672 
8673   /* Initialize abi specific va_list builtin types.  */
8674   if (TARGET_64BIT)
8675     {
8676       tree t;
8677       if (ix86_abi == MS_ABI)
8678         {
8679           t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8680           if (TREE_CODE (t) != RECORD_TYPE)
8681             t = build_variant_type_copy (t);
8682           sysv_va_list_type_node = t;
8683         }
8684       else
8685         {
8686           t = ret;
8687           if (TREE_CODE (t) != RECORD_TYPE)
8688             t = build_variant_type_copy (t);
8689           sysv_va_list_type_node = t;
8690         }
8691       if (ix86_abi != MS_ABI)
8692         {
8693           t = ix86_build_builtin_va_list_abi (MS_ABI);
8694           if (TREE_CODE (t) != RECORD_TYPE)
8695             t = build_variant_type_copy (t);
8696           ms_va_list_type_node = t;
8697         }
8698       else
8699         {
8700           t = ret;
8701           if (TREE_CODE (t) != RECORD_TYPE)
8702             t = build_variant_type_copy (t);
8703           ms_va_list_type_node = t;
8704         }
8705     }
8706 
8707   return ret;
8708 }
8709 
8710 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
8711 
8712 static void
8713 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8714 {
8715   rtx save_area, mem;
8716   alias_set_type set;
8717   int i, max;
8718 
8719   /* GPR size of varargs save area.  */
8720   if (cfun->va_list_gpr_size)
8721     ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8722   else
8723     ix86_varargs_gpr_size = 0;
8724 
8725   /* FPR size of varargs save area.  We don't need it if we don't pass
8726      anything in SSE registers.  */
8727   if (TARGET_SSE && cfun->va_list_fpr_size)
8728     ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8729   else
8730     ix86_varargs_fpr_size = 0;
8731 
8732   if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8733     return;
8734 
8735   save_area = frame_pointer_rtx;
8736   set = get_varargs_alias_set ();
8737 
8738   max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8739   if (max > X86_64_REGPARM_MAX)
8740     max = X86_64_REGPARM_MAX;
8741 
8742   for (i = cum->regno; i < max; i++)
8743     {
8744       mem = gen_rtx_MEM (word_mode,
8745 			 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8746       MEM_NOTRAP_P (mem) = 1;
8747       set_mem_alias_set (mem, set);
8748       emit_move_insn (mem,
8749 		      gen_rtx_REG (word_mode,
8750 				   x86_64_int_parameter_registers[i]));
8751     }
8752 
8753   if (ix86_varargs_fpr_size)
8754     {
8755       machine_mode smode;
8756       rtx_code_label *label;
8757       rtx test;
8758 
8759       /* Now emit code to save SSE registers.  The AX parameter contains number
8760 	 of SSE parameter registers used to call this function, though all we
8761 	 actually check here is the zero/non-zero status.  */
8762 
8763       label = gen_label_rtx ();
8764       test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8765       emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8766 				      label));
8767 
8768       /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8769 	 we used movdqa (i.e. TImode) instead?  Perhaps even better would
8770 	 be if we could determine the real mode of the data, via a hook
8771 	 into pass_stdarg.  Ignore all that for now.  */
8772       smode = V4SFmode;
8773       if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8774 	crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8775 
8776       max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8777       if (max > X86_64_SSE_REGPARM_MAX)
8778 	max = X86_64_SSE_REGPARM_MAX;
8779 
8780       for (i = cum->sse_regno; i < max; ++i)
8781 	{
8782 	  mem = plus_constant (Pmode, save_area,
8783 			       i * 16 + ix86_varargs_gpr_size);
8784 	  mem = gen_rtx_MEM (smode, mem);
8785 	  MEM_NOTRAP_P (mem) = 1;
8786 	  set_mem_alias_set (mem, set);
8787 	  set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8788 
8789 	  emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8790 	}
8791 
8792       emit_label (label);
8793     }
8794 }
8795 
8796 static void
8797 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8798 {
8799   alias_set_type set = get_varargs_alias_set ();
8800   int i;
8801 
8802   /* Reset to zero, as there might be a sysv vaarg used
8803      before.  */
8804   ix86_varargs_gpr_size = 0;
8805   ix86_varargs_fpr_size = 0;
8806 
8807   for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8808     {
8809       rtx reg, mem;
8810 
8811       mem = gen_rtx_MEM (Pmode,
8812 			 plus_constant (Pmode, virtual_incoming_args_rtx,
8813 					i * UNITS_PER_WORD));
8814       MEM_NOTRAP_P (mem) = 1;
8815       set_mem_alias_set (mem, set);
8816 
8817       reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8818       emit_move_insn (mem, reg);
8819     }
8820 }
8821 
8822 static void
8823 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8824 			     tree type, int *, int no_rtl)
8825 {
8826   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8827   CUMULATIVE_ARGS next_cum;
8828   tree fntype;
8829 
8830   /* This argument doesn't appear to be used anymore.  Which is good,
8831      because the old code here didn't suppress rtl generation.  */
8832   gcc_assert (!no_rtl);
8833 
8834   if (!TARGET_64BIT)
8835     return;
8836 
8837   fntype = TREE_TYPE (current_function_decl);
8838 
8839   /* For varargs, we do not want to skip the dummy va_dcl argument.
8840      For stdargs, we do want to skip the last named argument.  */
8841   next_cum = *cum;
8842   if (stdarg_p (fntype))
8843     ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8844 			       true);
8845 
8846   if (cum->call_abi == MS_ABI)
8847     setup_incoming_varargs_ms_64 (&next_cum);
8848   else
8849     setup_incoming_varargs_64 (&next_cum);
8850 }
8851 
8852 static void
8853 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8854 				   enum machine_mode mode,
8855 				   tree type,
8856 				   int *pretend_size ATTRIBUTE_UNUSED,
8857 				   int no_rtl)
8858 {
8859   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8860   CUMULATIVE_ARGS next_cum;
8861   tree fntype;
8862   rtx save_area;
8863   int bnd_reg, i, max;
8864 
8865   gcc_assert (!no_rtl);
8866 
8867   /* Do nothing if we use plain pointer to argument area.  */
8868   if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8869     return;
8870 
8871   fntype = TREE_TYPE (current_function_decl);
8872 
8873   /* For varargs, we do not want to skip the dummy va_dcl argument.
8874      For stdargs, we do want to skip the last named argument.  */
8875   next_cum = *cum;
8876   if (stdarg_p (fntype))
8877     ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8878 			       true);
8879   save_area = frame_pointer_rtx;
8880 
8881   max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8882   if (max > X86_64_REGPARM_MAX)
8883     max = X86_64_REGPARM_MAX;
8884 
8885   bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8886   if (chkp_function_instrumented_p (current_function_decl))
8887     for (i = cum->regno; i < max; i++)
8888       {
8889 	rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8890 	rtx reg = gen_rtx_REG (DImode,
8891 			       x86_64_int_parameter_registers[i]);
8892 	rtx ptr = reg;
8893 	rtx bounds;
8894 
8895 	if (bnd_reg <= LAST_BND_REG)
8896 	  bounds = gen_rtx_REG (BNDmode, bnd_reg);
8897 	else
8898 	  {
8899 	    rtx ldx_addr =
8900 	      plus_constant (Pmode, arg_pointer_rtx,
8901 			     (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8902 	    bounds = gen_reg_rtx (BNDmode);
8903 	    emit_insn (BNDmode == BND64mode
8904 		       ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8905 		       : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8906 	  }
8907 
8908 	emit_insn (BNDmode == BND64mode
8909 		   ? gen_bnd64_stx (addr, ptr, bounds)
8910 		   : gen_bnd32_stx (addr, ptr, bounds));
8911 
8912 	bnd_reg++;
8913       }
8914 }
8915 
8916 
8917 /* Checks if TYPE is of kind va_list char *.  */
8918 
8919 static bool
8920 is_va_list_char_pointer (tree type)
8921 {
8922   tree canonic;
8923 
8924   /* For 32-bit it is always true.  */
8925   if (!TARGET_64BIT)
8926     return true;
8927   canonic = ix86_canonical_va_list_type (type);
8928   return (canonic == ms_va_list_type_node
8929           || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8930 }
8931 
8932 /* Implement va_start.  */
8933 
8934 static void
8935 ix86_va_start (tree valist, rtx nextarg)
8936 {
8937   HOST_WIDE_INT words, n_gpr, n_fpr;
8938   tree f_gpr, f_fpr, f_ovf, f_sav;
8939   tree gpr, fpr, ovf, sav, t;
8940   tree type;
8941   rtx ovf_rtx;
8942 
8943   if (flag_split_stack
8944       && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8945     {
8946       unsigned int scratch_regno;
8947 
8948       /* When we are splitting the stack, we can't refer to the stack
8949 	 arguments using internal_arg_pointer, because they may be on
8950 	 the old stack.  The split stack prologue will arrange to
8951 	 leave a pointer to the old stack arguments in a scratch
8952 	 register, which we here copy to a pseudo-register.  The split
8953 	 stack prologue can't set the pseudo-register directly because
8954 	 it (the prologue) runs before any registers have been saved.  */
8955 
8956       scratch_regno = split_stack_prologue_scratch_regno ();
8957       if (scratch_regno != INVALID_REGNUM)
8958 	{
8959 	  rtx reg;
8960 	  rtx_insn *seq;
8961 
8962 	  reg = gen_reg_rtx (Pmode);
8963 	  cfun->machine->split_stack_varargs_pointer = reg;
8964 
8965 	  start_sequence ();
8966 	  emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8967 	  seq = get_insns ();
8968 	  end_sequence ();
8969 
8970 	  push_topmost_sequence ();
8971 	  emit_insn_after (seq, entry_of_function ());
8972 	  pop_topmost_sequence ();
8973 	}
8974     }
8975 
8976   /* Only 64bit target needs something special.  */
8977   if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8978     {
8979       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8980 	std_expand_builtin_va_start (valist, nextarg);
8981       else
8982 	{
8983 	  rtx va_r, next;
8984 
8985 	  va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8986 	  next = expand_binop (ptr_mode, add_optab,
8987 			       cfun->machine->split_stack_varargs_pointer,
8988 			       crtl->args.arg_offset_rtx,
8989 			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
8990 	  convert_move (va_r, next, 0);
8991 
8992 	  /* Store zero bounds for va_list.  */
8993 	  if (chkp_function_instrumented_p (current_function_decl))
8994 	    chkp_expand_bounds_reset_for_mem (valist,
8995 					      make_tree (TREE_TYPE (valist),
8996 							 next));
8997 
8998 	}
8999       return;
9000     }
9001 
9002   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9003   f_fpr = DECL_CHAIN (f_gpr);
9004   f_ovf = DECL_CHAIN (f_fpr);
9005   f_sav = DECL_CHAIN (f_ovf);
9006 
9007   valist = build_simple_mem_ref (valist);
9008   TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9009   /* The following should be folded into the MEM_REF offset.  */
9010   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9011 		f_gpr, NULL_TREE);
9012   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9013 		f_fpr, NULL_TREE);
9014   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9015 		f_ovf, NULL_TREE);
9016   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9017 		f_sav, NULL_TREE);
9018 
9019   /* Count number of gp and fp argument registers used.  */
9020   words = crtl->args.info.words;
9021   n_gpr = crtl->args.info.regno;
9022   n_fpr = crtl->args.info.sse_regno;
9023 
9024   if (cfun->va_list_gpr_size)
9025     {
9026       type = TREE_TYPE (gpr);
9027       t = build2 (MODIFY_EXPR, type,
9028 		  gpr, build_int_cst (type, n_gpr * 8));
9029       TREE_SIDE_EFFECTS (t) = 1;
9030       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9031     }
9032 
9033   if (TARGET_SSE && cfun->va_list_fpr_size)
9034     {
9035       type = TREE_TYPE (fpr);
9036       t = build2 (MODIFY_EXPR, type, fpr,
9037 		  build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9038       TREE_SIDE_EFFECTS (t) = 1;
9039       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9040     }
9041 
9042   /* Find the overflow area.  */
9043   type = TREE_TYPE (ovf);
9044   if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9045     ovf_rtx = crtl->args.internal_arg_pointer;
9046   else
9047     ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9048   t = make_tree (type, ovf_rtx);
9049   if (words != 0)
9050     t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9051 
9052   /* Store zero bounds for overflow area pointer.  */
9053   if (chkp_function_instrumented_p (current_function_decl))
9054     chkp_expand_bounds_reset_for_mem (ovf, t);
9055 
9056   t = build2 (MODIFY_EXPR, type, ovf, t);
9057   TREE_SIDE_EFFECTS (t) = 1;
9058   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9059 
9060   if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9061     {
9062       /* Find the register save area.
9063 	 Prologue of the function save it right above stack frame.  */
9064       type = TREE_TYPE (sav);
9065       t = make_tree (type, frame_pointer_rtx);
9066       if (!ix86_varargs_gpr_size)
9067 	t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9068 
9069       /* Store zero bounds for save area pointer.  */
9070       if (chkp_function_instrumented_p (current_function_decl))
9071 	chkp_expand_bounds_reset_for_mem (sav, t);
9072 
9073       t = build2 (MODIFY_EXPR, type, sav, t);
9074       TREE_SIDE_EFFECTS (t) = 1;
9075       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9076     }
9077 }
9078 
9079 /* Implement va_arg.  */
9080 
9081 static tree
9082 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9083 		      gimple_seq *post_p)
9084 {
9085   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9086   tree f_gpr, f_fpr, f_ovf, f_sav;
9087   tree gpr, fpr, ovf, sav, t;
9088   int size, rsize;
9089   tree lab_false, lab_over = NULL_TREE;
9090   tree addr, t2;
9091   rtx container;
9092   int indirect_p = 0;
9093   tree ptrtype;
9094   machine_mode nat_mode;
9095   unsigned int arg_boundary;
9096 
9097   /* Only 64bit target needs something special.  */
9098   if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9099     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9100 
9101   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9102   f_fpr = DECL_CHAIN (f_gpr);
9103   f_ovf = DECL_CHAIN (f_fpr);
9104   f_sav = DECL_CHAIN (f_ovf);
9105 
9106   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9107 		build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
9108   valist = build_va_arg_indirect_ref (valist);
9109   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9110   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9111   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9112 
9113   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9114   if (indirect_p)
9115     type = build_pointer_type (type);
9116   size = int_size_in_bytes (type);
9117   rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9118 
9119   nat_mode = type_natural_mode (type, NULL, false);
9120   switch (nat_mode)
9121     {
9122     case V8SFmode:
9123     case V8SImode:
9124     case V32QImode:
9125     case V16HImode:
9126     case V4DFmode:
9127     case V4DImode:
9128     case V16SFmode:
9129     case V16SImode:
9130     case V64QImode:
9131     case V32HImode:
9132     case V8DFmode:
9133     case V8DImode:
9134       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
9135       if (!TARGET_64BIT_MS_ABI)
9136 	{
9137 	  container = NULL;
9138 	  break;
9139 	}
9140 
9141     default:
9142       container = construct_container (nat_mode, TYPE_MODE (type),
9143 				       type, 0, X86_64_REGPARM_MAX,
9144 				       X86_64_SSE_REGPARM_MAX, intreg,
9145 				       0);
9146       break;
9147     }
9148 
9149   /* Pull the value out of the saved registers.  */
9150 
9151   addr = create_tmp_var (ptr_type_node, "addr");
9152 
9153   if (container)
9154     {
9155       int needed_intregs, needed_sseregs;
9156       bool need_temp;
9157       tree int_addr, sse_addr;
9158 
9159       lab_false = create_artificial_label (UNKNOWN_LOCATION);
9160       lab_over = create_artificial_label (UNKNOWN_LOCATION);
9161 
9162       examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9163 
9164       need_temp = (!REG_P (container)
9165 		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
9166 		       || TYPE_ALIGN (type) > 128));
9167 
9168       /* In case we are passing structure, verify that it is consecutive block
9169          on the register save area.  If not we need to do moves.  */
9170       if (!need_temp && !REG_P (container))
9171 	{
9172 	  /* Verify that all registers are strictly consecutive  */
9173 	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9174 	    {
9175 	      int i;
9176 
9177 	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9178 		{
9179 		  rtx slot = XVECEXP (container, 0, i);
9180 		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9181 		      || INTVAL (XEXP (slot, 1)) != i * 16)
9182 		    need_temp = true;
9183 		}
9184 	    }
9185 	  else
9186 	    {
9187 	      int i;
9188 
9189 	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9190 		{
9191 		  rtx slot = XVECEXP (container, 0, i);
9192 		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9193 		      || INTVAL (XEXP (slot, 1)) != i * 8)
9194 		    need_temp = true;
9195 		}
9196 	    }
9197 	}
9198       if (!need_temp)
9199 	{
9200 	  int_addr = addr;
9201 	  sse_addr = addr;
9202 	}
9203       else
9204 	{
9205 	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
9206 	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9207 	}
9208 
9209       /* First ensure that we fit completely in registers.  */
9210       if (needed_intregs)
9211 	{
9212 	  t = build_int_cst (TREE_TYPE (gpr),
9213 			     (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9214 	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9215 	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9216 	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9217 	  gimplify_and_add (t, pre_p);
9218 	}
9219       if (needed_sseregs)
9220 	{
9221 	  t = build_int_cst (TREE_TYPE (fpr),
9222 			     (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9223 			     + X86_64_REGPARM_MAX * 8);
9224 	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9225 	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9226 	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9227 	  gimplify_and_add (t, pre_p);
9228 	}
9229 
9230       /* Compute index to start of area used for integer regs.  */
9231       if (needed_intregs)
9232 	{
9233 	  /* int_addr = gpr + sav; */
9234 	  t = fold_build_pointer_plus (sav, gpr);
9235 	  gimplify_assign (int_addr, t, pre_p);
9236 	}
9237       if (needed_sseregs)
9238 	{
9239 	  /* sse_addr = fpr + sav; */
9240 	  t = fold_build_pointer_plus (sav, fpr);
9241 	  gimplify_assign (sse_addr, t, pre_p);
9242 	}
9243       if (need_temp)
9244 	{
9245 	  int i, prev_size = 0;
9246 	  tree temp = create_tmp_var (type, "va_arg_tmp");
9247 
9248 	  /* addr = &temp; */
9249 	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9250 	  gimplify_assign (addr, t, pre_p);
9251 
9252 	  for (i = 0; i < XVECLEN (container, 0); i++)
9253 	    {
9254 	      rtx slot = XVECEXP (container, 0, i);
9255 	      rtx reg = XEXP (slot, 0);
9256 	      machine_mode mode = GET_MODE (reg);
9257 	      tree piece_type;
9258 	      tree addr_type;
9259 	      tree daddr_type;
9260 	      tree src_addr, src;
9261 	      int src_offset;
9262 	      tree dest_addr, dest;
9263 	      int cur_size = GET_MODE_SIZE (mode);
9264 
9265 	      gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9266 	      prev_size = INTVAL (XEXP (slot, 1));
9267 	      if (prev_size + cur_size > size)
9268 		{
9269 		  cur_size = size - prev_size;
9270 		  mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9271 		  if (mode == BLKmode)
9272 		    mode = QImode;
9273 		}
9274 	      piece_type = lang_hooks.types.type_for_mode (mode, 1);
9275 	      if (mode == GET_MODE (reg))
9276 		addr_type = build_pointer_type (piece_type);
9277 	      else
9278 		addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9279 							 true);
9280 	      daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9281 							true);
9282 
9283 	      if (SSE_REGNO_P (REGNO (reg)))
9284 		{
9285 		  src_addr = sse_addr;
9286 		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9287 		}
9288 	      else
9289 		{
9290 		  src_addr = int_addr;
9291 		  src_offset = REGNO (reg) * 8;
9292 		}
9293 	      src_addr = fold_convert (addr_type, src_addr);
9294 	      src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9295 
9296 	      dest_addr = fold_convert (daddr_type, addr);
9297 	      dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9298 	      if (cur_size == GET_MODE_SIZE (mode))
9299 		{
9300 		  src = build_va_arg_indirect_ref (src_addr);
9301 		  dest = build_va_arg_indirect_ref (dest_addr);
9302 
9303 		  gimplify_assign (dest, src, pre_p);
9304 		}
9305 	      else
9306 		{
9307 		  tree copy
9308 		    = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9309 				       3, dest_addr, src_addr,
9310 				       size_int (cur_size));
9311 		  gimplify_and_add (copy, pre_p);
9312 		}
9313 	      prev_size += cur_size;
9314 	    }
9315 	}
9316 
9317       if (needed_intregs)
9318 	{
9319 	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9320 		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9321 	  gimplify_assign (gpr, t, pre_p);
9322 	}
9323 
9324       if (needed_sseregs)
9325 	{
9326 	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9327 		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9328 	  gimplify_assign (fpr, t, pre_p);
9329 	}
9330 
9331       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9332 
9333       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9334     }
9335 
9336   /* ... otherwise out of the overflow area.  */
9337 
9338   /* When we align parameter on stack for caller, if the parameter
9339      alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9340      aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
9341      here with caller.  */
9342   arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9343   if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9344     arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9345 
9346   /* Care for on-stack alignment if needed.  */
9347   if (arg_boundary <= 64 || size == 0)
9348     t = ovf;
9349  else
9350     {
9351       HOST_WIDE_INT align = arg_boundary / 8;
9352       t = fold_build_pointer_plus_hwi (ovf, align - 1);
9353       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9354 		  build_int_cst (TREE_TYPE (t), -align));
9355     }
9356 
9357   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9358   gimplify_assign (addr, t, pre_p);
9359 
9360   t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9361   gimplify_assign (unshare_expr (ovf), t, pre_p);
9362 
9363   if (container)
9364     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9365 
9366   ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9367   addr = fold_convert (ptrtype, addr);
9368 
9369   if (indirect_p)
9370     addr = build_va_arg_indirect_ref (addr);
9371   return build_va_arg_indirect_ref (addr);
9372 }
9373 
9374 /* Return true if OPNUM's MEM should be matched
9375    in movabs* patterns.  */
9376 
9377 bool
9378 ix86_check_movabs (rtx insn, int opnum)
9379 {
9380   rtx set, mem;
9381 
9382   set = PATTERN (insn);
9383   if (GET_CODE (set) == PARALLEL)
9384     set = XVECEXP (set, 0, 0);
9385   gcc_assert (GET_CODE (set) == SET);
9386   mem = XEXP (set, opnum);
9387   while (GET_CODE (mem) == SUBREG)
9388     mem = SUBREG_REG (mem);
9389   gcc_assert (MEM_P (mem));
9390   return volatile_ok || !MEM_VOLATILE_P (mem);
9391 }
9392 
9393 /* Initialize the table of extra 80387 mathematical constants.  */
9394 
9395 static void
9396 init_ext_80387_constants (void)
9397 {
9398   static const char * cst[5] =
9399   {
9400     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
9401     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
9402     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
9403     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
9404     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
9405   };
9406   int i;
9407 
9408   for (i = 0; i < 5; i++)
9409     {
9410       real_from_string (&ext_80387_constants_table[i], cst[i]);
9411       /* Ensure each constant is rounded to XFmode precision.  */
9412       real_convert (&ext_80387_constants_table[i],
9413 		    XFmode, &ext_80387_constants_table[i]);
9414     }
9415 
9416   ext_80387_constants_init = 1;
9417 }
9418 
9419 /* Return non-zero if the constant is something that
9420    can be loaded with a special instruction.  */
9421 
9422 int
9423 standard_80387_constant_p (rtx x)
9424 {
9425   machine_mode mode = GET_MODE (x);
9426 
9427   REAL_VALUE_TYPE r;
9428 
9429   if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9430     return -1;
9431 
9432   if (x == CONST0_RTX (mode))
9433     return 1;
9434   if (x == CONST1_RTX (mode))
9435     return 2;
9436 
9437   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9438 
9439   /* For XFmode constants, try to find a special 80387 instruction when
9440      optimizing for size or on those CPUs that benefit from them.  */
9441   if (mode == XFmode
9442       && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9443     {
9444       int i;
9445 
9446       if (! ext_80387_constants_init)
9447 	init_ext_80387_constants ();
9448 
9449       for (i = 0; i < 5; i++)
9450         if (real_identical (&r, &ext_80387_constants_table[i]))
9451 	  return i + 3;
9452     }
9453 
9454   /* Load of the constant -0.0 or -1.0 will be split as
9455      fldz;fchs or fld1;fchs sequence.  */
9456   if (real_isnegzero (&r))
9457     return 8;
9458   if (real_identical (&r, &dconstm1))
9459     return 9;
9460 
9461   return 0;
9462 }
9463 
9464 /* Return the opcode of the special instruction to be used to load
9465    the constant X.  */
9466 
9467 const char *
9468 standard_80387_constant_opcode (rtx x)
9469 {
9470   switch (standard_80387_constant_p (x))
9471     {
9472     case 1:
9473       return "fldz";
9474     case 2:
9475       return "fld1";
9476     case 3:
9477       return "fldlg2";
9478     case 4:
9479       return "fldln2";
9480     case 5:
9481       return "fldl2e";
9482     case 6:
9483       return "fldl2t";
9484     case 7:
9485       return "fldpi";
9486     case 8:
9487     case 9:
9488       return "#";
9489     default:
9490       gcc_unreachable ();
9491     }
9492 }
9493 
9494 /* Return the CONST_DOUBLE representing the 80387 constant that is
9495    loaded by the specified special instruction.  The argument IDX
9496    matches the return value from standard_80387_constant_p.  */
9497 
9498 rtx
9499 standard_80387_constant_rtx (int idx)
9500 {
9501   int i;
9502 
9503   if (! ext_80387_constants_init)
9504     init_ext_80387_constants ();
9505 
9506   switch (idx)
9507     {
9508     case 3:
9509     case 4:
9510     case 5:
9511     case 6:
9512     case 7:
9513       i = idx - 3;
9514       break;
9515 
9516     default:
9517       gcc_unreachable ();
9518     }
9519 
9520   return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9521 				       XFmode);
9522 }
9523 
9524 /* Return 1 if X is all 0s and 2 if x is all 1s
9525    in supported SSE/AVX vector mode.  */
9526 
9527 int
9528 standard_sse_constant_p (rtx x)
9529 {
9530   machine_mode mode = GET_MODE (x);
9531 
9532   if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9533     return 1;
9534   if (vector_all_ones_operand (x, mode))
9535     switch (mode)
9536       {
9537       case V16QImode:
9538       case V8HImode:
9539       case V4SImode:
9540       case V2DImode:
9541 	if (TARGET_SSE2)
9542 	  return 2;
9543       case V32QImode:
9544       case V16HImode:
9545       case V8SImode:
9546       case V4DImode:
9547 	if (TARGET_AVX2)
9548 	  return 2;
9549       case V64QImode:
9550       case V32HImode:
9551       case V16SImode:
9552       case V8DImode:
9553 	if (TARGET_AVX512F)
9554 	  return 2;
9555       default:
9556 	break;
9557       }
9558 
9559   return 0;
9560 }
9561 
9562 /* Return the opcode of the special instruction to be used to load
9563    the constant X.  */
9564 
9565 const char *
9566 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9567 {
9568   switch (standard_sse_constant_p (x))
9569     {
9570     case 1:
9571       switch (get_attr_mode (insn))
9572 	{
9573 	case MODE_XI:
9574 	  return "vpxord\t%g0, %g0, %g0";
9575 	case MODE_V16SF:
9576 	  return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9577 				 : "vpxord\t%g0, %g0, %g0";
9578 	case MODE_V8DF:
9579 	  return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9580 				 : "vpxorq\t%g0, %g0, %g0";
9581 	case MODE_TI:
9582 	  return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9583 				 : "%vpxor\t%0, %d0";
9584 	case MODE_V2DF:
9585 	  return "%vxorpd\t%0, %d0";
9586 	case MODE_V4SF:
9587 	  return "%vxorps\t%0, %d0";
9588 
9589 	case MODE_OI:
9590 	  return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9591 				 : "vpxor\t%x0, %x0, %x0";
9592 	case MODE_V4DF:
9593 	  return "vxorpd\t%x0, %x0, %x0";
9594 	case MODE_V8SF:
9595 	  return "vxorps\t%x0, %x0, %x0";
9596 
9597 	default:
9598 	  break;
9599 	}
9600 
9601     case 2:
9602       if (TARGET_AVX512VL
9603 	  || get_attr_mode (insn) == MODE_XI
9604 	  || get_attr_mode (insn) == MODE_V8DF
9605 	  || get_attr_mode (insn) == MODE_V16SF)
9606 	return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9607       if (TARGET_AVX)
9608 	return "vpcmpeqd\t%0, %0, %0";
9609       else
9610 	return "pcmpeqd\t%0, %0";
9611 
9612     default:
9613       break;
9614     }
9615   gcc_unreachable ();
9616 }
9617 
9618 /* Returns true if OP contains a symbol reference */
9619 
9620 bool
9621 symbolic_reference_mentioned_p (rtx op)
9622 {
9623   const char *fmt;
9624   int i;
9625 
9626   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9627     return true;
9628 
9629   fmt = GET_RTX_FORMAT (GET_CODE (op));
9630   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9631     {
9632       if (fmt[i] == 'E')
9633 	{
9634 	  int j;
9635 
9636 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9637 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9638 	      return true;
9639 	}
9640 
9641       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9642 	return true;
9643     }
9644 
9645   return false;
9646 }
9647 
9648 /* Return true if it is appropriate to emit `ret' instructions in the
9649    body of a function.  Do this only if the epilogue is simple, needing a
9650    couple of insns.  Prior to reloading, we can't tell how many registers
9651    must be saved, so return false then.  Return false if there is no frame
9652    marker to de-allocate.  */
9653 
9654 bool
9655 ix86_can_use_return_insn_p (void)
9656 {
9657   struct ix86_frame frame;
9658 
9659   if (! reload_completed || frame_pointer_needed)
9660     return 0;
9661 
9662   /* Don't allow more than 32k pop, since that's all we can do
9663      with one instruction.  */
9664   if (crtl->args.pops_args && crtl->args.size >= 32768)
9665     return 0;
9666 
9667   ix86_compute_frame_layout (&frame);
9668   return (frame.stack_pointer_offset == UNITS_PER_WORD
9669 	  && (frame.nregs + frame.nsseregs) == 0);
9670 }
9671 
9672 /* Value should be nonzero if functions must have frame pointers.
9673    Zero means the frame pointer need not be set up (and parms may
9674    be accessed via the stack pointer) in functions that seem suitable.  */
9675 
9676 static bool
9677 ix86_frame_pointer_required (void)
9678 {
9679   /* If we accessed previous frames, then the generated code expects
9680      to be able to access the saved ebp value in our frame.  */
9681   if (cfun->machine->accesses_prev_frame)
9682     return true;
9683 
9684   /* Several x86 os'es need a frame pointer for other reasons,
9685      usually pertaining to setjmp.  */
9686   if (SUBTARGET_FRAME_POINTER_REQUIRED)
9687     return true;
9688 
9689   /* For older 32-bit runtimes setjmp requires valid frame-pointer.  */
9690   if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9691     return true;
9692 
9693   /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9694      allocation is 4GB.  */
9695   if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9696     return true;
9697 
9698   /* SSE saves require frame-pointer when stack is misaligned.  */
9699   if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
9700     return true;
9701 
9702   /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9703      turns off the frame pointer by default.  Turn it back on now if
9704      we've not got a leaf function.  */
9705   if (TARGET_OMIT_LEAF_FRAME_POINTER
9706       && (!crtl->is_leaf
9707 	  || ix86_current_function_calls_tls_descriptor))
9708     return true;
9709 
9710   if (crtl->profile && !flag_fentry)
9711     return true;
9712 
9713   return false;
9714 }
9715 
9716 /* Record that the current function accesses previous call frames.  */
9717 
9718 void
9719 ix86_setup_frame_addresses (void)
9720 {
9721   cfun->machine->accesses_prev_frame = 1;
9722 }
9723 
9724 #ifndef USE_HIDDEN_LINKONCE
9725 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9726 #  define USE_HIDDEN_LINKONCE 1
9727 # else
9728 #  define USE_HIDDEN_LINKONCE 0
9729 # endif
9730 #endif
9731 
9732 static int pic_labels_used;
9733 
9734 /* Fills in the label name that should be used for a pc thunk for
9735    the given register.  */
9736 
9737 static void
9738 get_pc_thunk_name (char name[32], unsigned int regno)
9739 {
9740   gcc_assert (!TARGET_64BIT);
9741 
9742   if (USE_HIDDEN_LINKONCE)
9743     sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9744   else
9745     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9746 }
9747 
9748 
9749 /* This function generates code for -fpic that loads %ebx with
9750    the return address of the caller and then returns.  */
9751 
9752 static void
9753 ix86_code_end (void)
9754 {
9755   rtx xops[2];
9756   int regno;
9757 
9758   for (regno = AX_REG; regno <= SP_REG; regno++)
9759     {
9760       char name[32];
9761       tree decl;
9762 
9763       if (!(pic_labels_used & (1 << regno)))
9764 	continue;
9765 
9766       get_pc_thunk_name (name, regno);
9767 
9768       decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9769 			 get_identifier (name),
9770 			 build_function_type_list (void_type_node, NULL_TREE));
9771       DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9772 				       NULL_TREE, void_type_node);
9773       TREE_PUBLIC (decl) = 1;
9774       TREE_STATIC (decl) = 1;
9775       DECL_IGNORED_P (decl) = 1;
9776 
9777 #if TARGET_MACHO
9778       if (TARGET_MACHO)
9779 	{
9780 	  switch_to_section (darwin_sections[text_coal_section]);
9781 	  fputs ("\t.weak_definition\t", asm_out_file);
9782 	  assemble_name (asm_out_file, name);
9783 	  fputs ("\n\t.private_extern\t", asm_out_file);
9784 	  assemble_name (asm_out_file, name);
9785 	  putc ('\n', asm_out_file);
9786 	  ASM_OUTPUT_LABEL (asm_out_file, name);
9787 	  DECL_WEAK (decl) = 1;
9788 	}
9789       else
9790 #endif
9791       if (USE_HIDDEN_LINKONCE)
9792 	{
9793 	  cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9794 
9795 	  targetm.asm_out.unique_section (decl, 0);
9796 	  switch_to_section (get_named_section (decl, NULL, 0));
9797 
9798 	  targetm.asm_out.globalize_label (asm_out_file, name);
9799 	  fputs ("\t.hidden\t", asm_out_file);
9800 	  assemble_name (asm_out_file, name);
9801 	  putc ('\n', asm_out_file);
9802 	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9803 	}
9804       else
9805 	{
9806 	  switch_to_section (text_section);
9807 	  ASM_OUTPUT_LABEL (asm_out_file, name);
9808 	}
9809 
9810       DECL_INITIAL (decl) = make_node (BLOCK);
9811       current_function_decl = decl;
9812       init_function_start (decl);
9813       first_function_block_is_cold = false;
9814       /* Make sure unwind info is emitted for the thunk if needed.  */
9815       final_start_function (emit_barrier (), asm_out_file, 1);
9816 
9817       /* Pad stack IP move with 4 instructions (two NOPs count
9818 	 as one instruction).  */
9819       if (TARGET_PAD_SHORT_FUNCTION)
9820 	{
9821 	  int i = 8;
9822 
9823 	  while (i--)
9824 	    fputs ("\tnop\n", asm_out_file);
9825 	}
9826 
9827       xops[0] = gen_rtx_REG (Pmode, regno);
9828       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9829       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9830       output_asm_insn ("%!ret", NULL);
9831       final_end_function ();
9832       init_insn_lengths ();
9833       free_after_compilation (cfun);
9834       set_cfun (NULL);
9835       current_function_decl = NULL;
9836     }
9837 
9838   if (flag_split_stack)
9839     file_end_indicate_split_stack ();
9840 }
9841 
9842 /* Emit code for the SET_GOT patterns.  */
9843 
9844 const char *
9845 output_set_got (rtx dest, rtx label)
9846 {
9847   rtx xops[3];
9848 
9849   xops[0] = dest;
9850 
9851   if (TARGET_VXWORKS_RTP && flag_pic)
9852     {
9853       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
9854       xops[2] = gen_rtx_MEM (Pmode,
9855 			     gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9856       output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9857 
9858       /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9859 	 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9860 	 an unadorned address.  */
9861       xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9862       SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9863       output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9864       return "";
9865     }
9866 
9867   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9868 
9869   if (!flag_pic)
9870     {
9871       if (TARGET_MACHO)
9872 	/* We don't need a pic base, we're not producing pic.  */
9873 	gcc_unreachable ();
9874 
9875       xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9876       output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9877       targetm.asm_out.internal_label (asm_out_file, "L",
9878 				      CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9879     }
9880   else
9881     {
9882       char name[32];
9883       get_pc_thunk_name (name, REGNO (dest));
9884       pic_labels_used |= 1 << REGNO (dest);
9885 
9886       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9887       xops[2] = gen_rtx_MEM (QImode, xops[2]);
9888       output_asm_insn ("%!call\t%X2", xops);
9889 
9890 #if TARGET_MACHO
9891       /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9892          This is what will be referenced by the Mach-O PIC subsystem.  */
9893       if (machopic_should_output_picbase_label () || !label)
9894 	ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9895 
9896       /* When we are restoring the pic base at the site of a nonlocal label,
9897          and we decided to emit the pic base above, we will still output a
9898          local label used for calculating the correction offset (even though
9899          the offset will be 0 in that case).  */
9900       if (label)
9901         targetm.asm_out.internal_label (asm_out_file, "L",
9902 					   CODE_LABEL_NUMBER (label));
9903 #endif
9904     }
9905 
9906   if (!TARGET_MACHO)
9907     output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9908 
9909   return "";
9910 }
9911 
9912 /* Generate an "push" pattern for input ARG.  */
9913 
9914 static rtx
9915 gen_push (rtx arg)
9916 {
9917   struct machine_function *m = cfun->machine;
9918 
9919   if (m->fs.cfa_reg == stack_pointer_rtx)
9920     m->fs.cfa_offset += UNITS_PER_WORD;
9921   m->fs.sp_offset += UNITS_PER_WORD;
9922 
9923   if (REG_P (arg) && GET_MODE (arg) != word_mode)
9924     arg = gen_rtx_REG (word_mode, REGNO (arg));
9925 
9926   return gen_rtx_SET (VOIDmode,
9927 		      gen_rtx_MEM (word_mode,
9928 				   gen_rtx_PRE_DEC (Pmode,
9929 						    stack_pointer_rtx)),
9930 		      arg);
9931 }
9932 
9933 /* Generate an "pop" pattern for input ARG.  */
9934 
9935 static rtx
9936 gen_pop (rtx arg)
9937 {
9938   if (REG_P (arg) && GET_MODE (arg) != word_mode)
9939     arg = gen_rtx_REG (word_mode, REGNO (arg));
9940 
9941   return gen_rtx_SET (VOIDmode,
9942 		      arg,
9943 		      gen_rtx_MEM (word_mode,
9944 				   gen_rtx_POST_INC (Pmode,
9945 						     stack_pointer_rtx)));
9946 }
9947 
9948 /* Return >= 0 if there is an unused call-clobbered register available
9949    for the entire function.  */
9950 
9951 static unsigned int
9952 ix86_select_alt_pic_regnum (void)
9953 {
9954   if (ix86_use_pseudo_pic_reg ())
9955     return INVALID_REGNUM;
9956 
9957   if (crtl->is_leaf
9958       && !crtl->profile
9959       && !ix86_current_function_calls_tls_descriptor)
9960     {
9961       int i, drap;
9962       /* Can't use the same register for both PIC and DRAP.  */
9963       if (crtl->drap_reg)
9964 	drap = REGNO (crtl->drap_reg);
9965       else
9966 	drap = -1;
9967       for (i = 2; i >= 0; --i)
9968         if (i != drap && !df_regs_ever_live_p (i))
9969 	  return i;
9970     }
9971 
9972   return INVALID_REGNUM;
9973 }
9974 
9975 /* Return TRUE if we need to save REGNO.  */
9976 
9977 static bool
9978 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9979 {
9980   if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9981       && pic_offset_table_rtx)
9982     {
9983       if (ix86_use_pseudo_pic_reg ())
9984 	{
9985 	  /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9986 	  _mcount in prologue.  */
9987 	  if (!TARGET_64BIT && flag_pic && crtl->profile)
9988 	    return true;
9989 	}
9990       else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9991 	       || crtl->profile
9992 	       || crtl->calls_eh_return
9993 	       || crtl->uses_const_pool
9994 	       || cfun->has_nonlocal_label)
9995         return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9996     }
9997 
9998   if (crtl->calls_eh_return && maybe_eh_return)
9999     {
10000       unsigned i;
10001       for (i = 0; ; i++)
10002 	{
10003 	  unsigned test = EH_RETURN_DATA_REGNO (i);
10004 	  if (test == INVALID_REGNUM)
10005 	    break;
10006 	  if (test == regno)
10007 	    return true;
10008 	}
10009     }
10010 
10011   if (crtl->drap_reg
10012       && regno == REGNO (crtl->drap_reg)
10013       && !cfun->machine->no_drap_save_restore)
10014     return true;
10015 
10016   return (df_regs_ever_live_p (regno)
10017 	  && !call_used_regs[regno]
10018 	  && !fixed_regs[regno]
10019 	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10020 }
10021 
10022 /* Return number of saved general prupose registers.  */
10023 
10024 static int
10025 ix86_nsaved_regs (void)
10026 {
10027   int nregs = 0;
10028   int regno;
10029 
10030   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10031     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10032       nregs ++;
10033   return nregs;
10034 }
10035 
10036 /* Return number of saved SSE registrers.  */
10037 
10038 static int
10039 ix86_nsaved_sseregs (void)
10040 {
10041   int nregs = 0;
10042   int regno;
10043 
10044   if (!TARGET_64BIT_MS_ABI)
10045     return 0;
10046   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10047     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10048       nregs ++;
10049   return nregs;
10050 }
10051 
10052 /* Given FROM and TO register numbers, say whether this elimination is
10053    allowed.  If stack alignment is needed, we can only replace argument
10054    pointer with hard frame pointer, or replace frame pointer with stack
10055    pointer.  Otherwise, frame pointer elimination is automatically
10056    handled and all other eliminations are valid.  */
10057 
10058 static bool
10059 ix86_can_eliminate (const int from, const int to)
10060 {
10061   if (stack_realign_fp)
10062     return ((from == ARG_POINTER_REGNUM
10063 	     && to == HARD_FRAME_POINTER_REGNUM)
10064 	    || (from == FRAME_POINTER_REGNUM
10065 		&& to == STACK_POINTER_REGNUM));
10066   else
10067     return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10068 }
10069 
10070 /* Return the offset between two registers, one to be eliminated, and the other
10071    its replacement, at the start of a routine.  */
10072 
10073 HOST_WIDE_INT
10074 ix86_initial_elimination_offset (int from, int to)
10075 {
10076   struct ix86_frame frame;
10077   ix86_compute_frame_layout (&frame);
10078 
10079   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10080     return frame.hard_frame_pointer_offset;
10081   else if (from == FRAME_POINTER_REGNUM
10082 	   && to == HARD_FRAME_POINTER_REGNUM)
10083     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10084   else
10085     {
10086       gcc_assert (to == STACK_POINTER_REGNUM);
10087 
10088       if (from == ARG_POINTER_REGNUM)
10089 	return frame.stack_pointer_offset;
10090 
10091       gcc_assert (from == FRAME_POINTER_REGNUM);
10092       return frame.stack_pointer_offset - frame.frame_pointer_offset;
10093     }
10094 }
10095 
10096 /* In a dynamically-aligned function, we can't know the offset from
10097    stack pointer to frame pointer, so we must ensure that setjmp
10098    eliminates fp against the hard fp (%ebp) rather than trying to
10099    index from %esp up to the top of the frame across a gap that is
10100    of unknown (at compile-time) size.  */
10101 static rtx
10102 ix86_builtin_setjmp_frame_value (void)
10103 {
10104   return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10105 }
10106 
10107 /* When using -fsplit-stack, the allocation routines set a field in
10108    the TCB to the bottom of the stack plus this much space, measured
10109    in bytes.  */
10110 
10111 #define SPLIT_STACK_AVAILABLE 256
10112 
10113 /* Fill structure ix86_frame about frame of currently computed function.  */
10114 
10115 static void
10116 ix86_compute_frame_layout (struct ix86_frame *frame)
10117 {
10118   unsigned HOST_WIDE_INT stack_alignment_needed;
10119   HOST_WIDE_INT offset;
10120   unsigned HOST_WIDE_INT preferred_alignment;
10121   HOST_WIDE_INT size = get_frame_size ();
10122   HOST_WIDE_INT to_allocate;
10123 
10124   frame->nregs = ix86_nsaved_regs ();
10125   frame->nsseregs = ix86_nsaved_sseregs ();
10126 
10127   /* 64-bit MS ABI seem to require stack alignment to be always 16,
10128      except for function prologues, leaf functions and when the defult
10129      incoming stack boundary is overriden at command line or via
10130      force_align_arg_pointer attribute.  */
10131   if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10132       && (!crtl->is_leaf || cfun->calls_alloca != 0
10133 	  || ix86_current_function_calls_tls_descriptor
10134 	  || ix86_incoming_stack_boundary < 128))
10135     {
10136       crtl->preferred_stack_boundary = 128;
10137       crtl->stack_alignment_needed = 128;
10138     }
10139 
10140   stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10141   preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10142 
10143   gcc_assert (!size || stack_alignment_needed);
10144   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10145   gcc_assert (preferred_alignment <= stack_alignment_needed);
10146 
10147   /* For SEH we have to limit the amount of code movement into the prologue.
10148      At present we do this via a BLOCKAGE, at which point there's very little
10149      scheduling that can be done, which means that there's very little point
10150      in doing anything except PUSHs.  */
10151   if (TARGET_SEH)
10152     cfun->machine->use_fast_prologue_epilogue = false;
10153 
10154   /* During reload iteration the amount of registers saved can change.
10155      Recompute the value as needed.  Do not recompute when amount of registers
10156      didn't change as reload does multiple calls to the function and does not
10157      expect the decision to change within single iteration.  */
10158   else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10159            && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10160     {
10161       int count = frame->nregs;
10162       struct cgraph_node *node = cgraph_node::get (current_function_decl);
10163 
10164       cfun->machine->use_fast_prologue_epilogue_nregs = count;
10165 
10166       /* The fast prologue uses move instead of push to save registers.  This
10167          is significantly longer, but also executes faster as modern hardware
10168          can execute the moves in parallel, but can't do that for push/pop.
10169 
10170 	 Be careful about choosing what prologue to emit:  When function takes
10171 	 many instructions to execute we may use slow version as well as in
10172 	 case function is known to be outside hot spot (this is known with
10173 	 feedback only).  Weight the size of function by number of registers
10174 	 to save as it is cheap to use one or two push instructions but very
10175 	 slow to use many of them.  */
10176       if (count)
10177 	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10178       if (node->frequency < NODE_FREQUENCY_NORMAL
10179 	  || (flag_branch_probabilities
10180 	      && node->frequency < NODE_FREQUENCY_HOT))
10181         cfun->machine->use_fast_prologue_epilogue = false;
10182       else
10183         cfun->machine->use_fast_prologue_epilogue
10184 	   = !expensive_function_p (count);
10185     }
10186 
10187   frame->save_regs_using_mov
10188     = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10189        /* If static stack checking is enabled and done with probes,
10190 	  the registers need to be saved before allocating the frame.  */
10191        && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10192 
10193   /* Skip return address.  */
10194   offset = UNITS_PER_WORD;
10195 
10196   /* Skip pushed static chain.  */
10197   if (ix86_static_chain_on_stack)
10198     offset += UNITS_PER_WORD;
10199 
10200   /* Skip saved base pointer.  */
10201   if (frame_pointer_needed)
10202     offset += UNITS_PER_WORD;
10203   frame->hfp_save_offset = offset;
10204 
10205   /* The traditional frame pointer location is at the top of the frame.  */
10206   frame->hard_frame_pointer_offset = offset;
10207 
10208   /* Register save area */
10209   offset += frame->nregs * UNITS_PER_WORD;
10210   frame->reg_save_offset = offset;
10211 
10212   /* On SEH target, registers are pushed just before the frame pointer
10213      location.  */
10214   if (TARGET_SEH)
10215     frame->hard_frame_pointer_offset = offset;
10216 
10217   /* Align and set SSE register save area.  */
10218   if (frame->nsseregs)
10219     {
10220       /* The only ABI that has saved SSE registers (Win64) also has a
10221 	 16-byte aligned default stack, and thus we don't need to be
10222 	 within the re-aligned local stack frame to save them.  In case
10223 	 incoming stack boundary is aligned to less than 16 bytes,
10224 	 unaligned move of SSE register will be emitted, so there is
10225 	 no point to round up the SSE register save area outside the
10226 	 re-aligned local stack frame to 16 bytes.  */
10227       if (ix86_incoming_stack_boundary >= 128)
10228 	offset = (offset + 16 - 1) & -16;
10229       offset += frame->nsseregs * 16;
10230     }
10231   frame->sse_reg_save_offset = offset;
10232 
10233   /* The re-aligned stack starts here.  Values before this point are not
10234      directly comparable with values below this point.  In order to make
10235      sure that no value happens to be the same before and after, force
10236      the alignment computation below to add a non-zero value.  */
10237   if (stack_realign_fp)
10238     offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10239 
10240   /* Va-arg area */
10241   frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10242   offset += frame->va_arg_size;
10243 
10244   /* Align start of frame for local function.  */
10245   if (stack_realign_fp
10246       || offset != frame->sse_reg_save_offset
10247       || size != 0
10248       || !crtl->is_leaf
10249       || cfun->calls_alloca
10250       || ix86_current_function_calls_tls_descriptor)
10251     offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10252 
10253   /* Frame pointer points here.  */
10254   frame->frame_pointer_offset = offset;
10255 
10256   offset += size;
10257 
10258   /* Add outgoing arguments area.  Can be skipped if we eliminated
10259      all the function calls as dead code.
10260      Skipping is however impossible when function calls alloca.  Alloca
10261      expander assumes that last crtl->outgoing_args_size
10262      of stack frame are unused.  */
10263   if (ACCUMULATE_OUTGOING_ARGS
10264       && (!crtl->is_leaf || cfun->calls_alloca
10265 	  || ix86_current_function_calls_tls_descriptor))
10266     {
10267       offset += crtl->outgoing_args_size;
10268       frame->outgoing_arguments_size = crtl->outgoing_args_size;
10269     }
10270   else
10271     frame->outgoing_arguments_size = 0;
10272 
10273   /* Align stack boundary.  Only needed if we're calling another function
10274      or using alloca.  */
10275   if (!crtl->is_leaf || cfun->calls_alloca
10276       || ix86_current_function_calls_tls_descriptor)
10277     offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10278 
10279   /* We've reached end of stack frame.  */
10280   frame->stack_pointer_offset = offset;
10281 
10282   /* Size prologue needs to allocate.  */
10283   to_allocate = offset - frame->sse_reg_save_offset;
10284 
10285   if ((!to_allocate && frame->nregs <= 1)
10286       || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10287     frame->save_regs_using_mov = false;
10288 
10289   if (ix86_using_red_zone ()
10290       && crtl->sp_is_unchanging
10291       && crtl->is_leaf
10292       && !ix86_current_function_calls_tls_descriptor)
10293     {
10294       frame->red_zone_size = to_allocate;
10295       if (frame->save_regs_using_mov)
10296 	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10297       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10298 	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10299     }
10300   else
10301     frame->red_zone_size = 0;
10302   frame->stack_pointer_offset -= frame->red_zone_size;
10303 
10304   /* The SEH frame pointer location is near the bottom of the frame.
10305      This is enforced by the fact that the difference between the
10306      stack pointer and the frame pointer is limited to 240 bytes in
10307      the unwind data structure.  */
10308   if (TARGET_SEH)
10309     {
10310       HOST_WIDE_INT diff;
10311 
10312       /* If we can leave the frame pointer where it is, do so.  Also, returns
10313 	 the establisher frame for __builtin_frame_address (0).  */
10314       diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10315       if (diff <= SEH_MAX_FRAME_SIZE
10316 	  && (diff > 240 || (diff & 15) != 0)
10317 	  && !crtl->accesses_prior_frames)
10318 	{
10319 	  /* Ideally we'd determine what portion of the local stack frame
10320 	     (within the constraint of the lowest 240) is most heavily used.
10321 	     But without that complication, simply bias the frame pointer
10322 	     by 128 bytes so as to maximize the amount of the local stack
10323 	     frame that is addressable with 8-bit offsets.  */
10324 	  frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10325 	}
10326     }
10327 }
10328 
10329 /* This is semi-inlined memory_address_length, but simplified
10330    since we know that we're always dealing with reg+offset, and
10331    to avoid having to create and discard all that rtl.  */
10332 
10333 static inline int
10334 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10335 {
10336   int len = 4;
10337 
10338   if (offset == 0)
10339     {
10340       /* EBP and R13 cannot be encoded without an offset.  */
10341       len = (regno == BP_REG || regno == R13_REG);
10342     }
10343   else if (IN_RANGE (offset, -128, 127))
10344     len = 1;
10345 
10346   /* ESP and R12 must be encoded with a SIB byte.  */
10347   if (regno == SP_REG || regno == R12_REG)
10348     len++;
10349 
10350   return len;
10351 }
10352 
10353 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10354    The valid base registers are taken from CFUN->MACHINE->FS.  */
10355 
10356 static rtx
10357 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10358 {
10359   const struct machine_function *m = cfun->machine;
10360   rtx base_reg = NULL;
10361   HOST_WIDE_INT base_offset = 0;
10362 
10363   if (m->use_fast_prologue_epilogue)
10364     {
10365       /* Choose the base register most likely to allow the most scheduling
10366          opportunities.  Generally FP is valid throughout the function,
10367          while DRAP must be reloaded within the epilogue.  But choose either
10368          over the SP due to increased encoding size.  */
10369 
10370       if (m->fs.fp_valid)
10371 	{
10372 	  base_reg = hard_frame_pointer_rtx;
10373 	  base_offset = m->fs.fp_offset - cfa_offset;
10374 	}
10375       else if (m->fs.drap_valid)
10376 	{
10377 	  base_reg = crtl->drap_reg;
10378 	  base_offset = 0 - cfa_offset;
10379 	}
10380       else if (m->fs.sp_valid)
10381 	{
10382 	  base_reg = stack_pointer_rtx;
10383 	  base_offset = m->fs.sp_offset - cfa_offset;
10384 	}
10385     }
10386   else
10387     {
10388       HOST_WIDE_INT toffset;
10389       int len = 16, tlen;
10390 
10391       /* Choose the base register with the smallest address encoding.
10392          With a tie, choose FP > DRAP > SP.  */
10393       if (m->fs.sp_valid)
10394 	{
10395 	  base_reg = stack_pointer_rtx;
10396 	  base_offset = m->fs.sp_offset - cfa_offset;
10397           len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10398 	}
10399       if (m->fs.drap_valid)
10400 	{
10401 	  toffset = 0 - cfa_offset;
10402 	  tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10403 	  if (tlen <= len)
10404 	    {
10405 	      base_reg = crtl->drap_reg;
10406 	      base_offset = toffset;
10407 	      len = tlen;
10408 	    }
10409 	}
10410       if (m->fs.fp_valid)
10411 	{
10412 	  toffset = m->fs.fp_offset - cfa_offset;
10413 	  tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10414 	  if (tlen <= len)
10415 	    {
10416 	      base_reg = hard_frame_pointer_rtx;
10417 	      base_offset = toffset;
10418 	      len = tlen;
10419 	    }
10420 	}
10421     }
10422   gcc_assert (base_reg != NULL);
10423 
10424   return plus_constant (Pmode, base_reg, base_offset);
10425 }
10426 
10427 /* Emit code to save registers in the prologue.  */
10428 
10429 static void
10430 ix86_emit_save_regs (void)
10431 {
10432   unsigned int regno;
10433   rtx insn;
10434 
10435   for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10436     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10437       {
10438 	insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10439 	RTX_FRAME_RELATED_P (insn) = 1;
10440       }
10441 }
10442 
10443 /* Emit a single register save at CFA - CFA_OFFSET.  */
10444 
10445 static void
10446 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10447 			      HOST_WIDE_INT cfa_offset)
10448 {
10449   struct machine_function *m = cfun->machine;
10450   rtx reg = gen_rtx_REG (mode, regno);
10451   rtx unspec = NULL_RTX;
10452   rtx mem, addr, base, insn;
10453   unsigned int align;
10454 
10455   addr = choose_baseaddr (cfa_offset);
10456   mem = gen_frame_mem (mode, addr);
10457 
10458   /* The location is aligned up to INCOMING_STACK_BOUNDARY.  */
10459   align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
10460   set_mem_align (mem, align);
10461 
10462   /* SSE saves are not within re-aligned local stack frame.
10463      In case INCOMING_STACK_BOUNDARY is misaligned, we have
10464      to emit unaligned store.  */
10465   if (mode == V4SFmode && align < 128)
10466     unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU);
10467 
10468   insn = emit_insn (gen_rtx_SET (VOIDmode, mem, unspec ? unspec : reg));
10469   RTX_FRAME_RELATED_P (insn) = 1;
10470 
10471   base = addr;
10472   if (GET_CODE (base) == PLUS)
10473     base = XEXP (base, 0);
10474   gcc_checking_assert (REG_P (base));
10475 
10476   /* When saving registers into a re-aligned local stack frame, avoid
10477      any tricky guessing by dwarf2out.  */
10478   if (m->fs.realigned)
10479     {
10480       gcc_checking_assert (stack_realign_drap);
10481 
10482       if (regno == REGNO (crtl->drap_reg))
10483 	{
10484 	  /* A bit of a hack.  We force the DRAP register to be saved in
10485 	     the re-aligned stack frame, which provides us with a copy
10486 	     of the CFA that will last past the prologue.  Install it.  */
10487 	  gcc_checking_assert (cfun->machine->fs.fp_valid);
10488 	  addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10489 				cfun->machine->fs.fp_offset - cfa_offset);
10490 	  mem = gen_rtx_MEM (mode, addr);
10491 	  add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10492 	}
10493       else
10494 	{
10495 	  /* The frame pointer is a stable reference within the
10496 	     aligned frame.  Use it.  */
10497 	  gcc_checking_assert (cfun->machine->fs.fp_valid);
10498 	  addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10499 				cfun->machine->fs.fp_offset - cfa_offset);
10500 	  mem = gen_rtx_MEM (mode, addr);
10501 	  add_reg_note (insn, REG_CFA_EXPRESSION,
10502 			gen_rtx_SET (VOIDmode, mem, reg));
10503 	}
10504     }
10505 
10506   /* The memory may not be relative to the current CFA register,
10507      which means that we may need to generate a new pattern for
10508      use by the unwind info.  */
10509   else if (base != m->fs.cfa_reg)
10510     {
10511       addr = plus_constant (Pmode, m->fs.cfa_reg,
10512 			    m->fs.cfa_offset - cfa_offset);
10513       mem = gen_rtx_MEM (mode, addr);
10514       add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10515     }
10516   else if (unspec)
10517     add_reg_note (insn, REG_CFA_EXPRESSION,
10518 		  gen_rtx_SET (VOIDmode, mem, reg));
10519 }
10520 
10521 /* Emit code to save registers using MOV insns.
10522    First register is stored at CFA - CFA_OFFSET.  */
10523 static void
10524 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10525 {
10526   unsigned int regno;
10527 
10528   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10529     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10530       {
10531         ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10532 	cfa_offset -= UNITS_PER_WORD;
10533       }
10534 }
10535 
10536 /* Emit code to save SSE registers using MOV insns.
10537    First register is stored at CFA - CFA_OFFSET.  */
10538 static void
10539 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10540 {
10541   unsigned int regno;
10542 
10543   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10544     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10545       {
10546 	ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10547 	cfa_offset -= 16;
10548       }
10549 }
10550 
10551 static GTY(()) rtx queued_cfa_restores;
10552 
10553 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10554    manipulation insn.  The value is on the stack at CFA - CFA_OFFSET.
10555    Don't add the note if the previously saved value will be left untouched
10556    within stack red-zone till return, as unwinders can find the same value
10557    in the register and on the stack.  */
10558 
10559 static void
10560 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10561 {
10562   if (!crtl->shrink_wrapped
10563       && cfa_offset <= cfun->machine->fs.red_zone_offset)
10564     return;
10565 
10566   if (insn)
10567     {
10568       add_reg_note (insn, REG_CFA_RESTORE, reg);
10569       RTX_FRAME_RELATED_P (insn) = 1;
10570     }
10571   else
10572     queued_cfa_restores
10573       = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10574 }
10575 
10576 /* Add queued REG_CFA_RESTORE notes if any to INSN.  */
10577 
10578 static void
10579 ix86_add_queued_cfa_restore_notes (rtx insn)
10580 {
10581   rtx last;
10582   if (!queued_cfa_restores)
10583     return;
10584   for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10585     ;
10586   XEXP (last, 1) = REG_NOTES (insn);
10587   REG_NOTES (insn) = queued_cfa_restores;
10588   queued_cfa_restores = NULL_RTX;
10589   RTX_FRAME_RELATED_P (insn) = 1;
10590 }
10591 
10592 /* Expand prologue or epilogue stack adjustment.
10593    The pattern exist to put a dependency on all ebp-based memory accesses.
10594    STYLE should be negative if instructions should be marked as frame related,
10595    zero if %r11 register is live and cannot be freely used and positive
10596    otherwise.  */
10597 
10598 static void
10599 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10600 			   int style, bool set_cfa)
10601 {
10602   struct machine_function *m = cfun->machine;
10603   rtx insn;
10604   bool add_frame_related_expr = false;
10605 
10606   if (Pmode == SImode)
10607     insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10608   else if (x86_64_immediate_operand (offset, DImode))
10609     insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10610   else
10611     {
10612       rtx tmp;
10613       /* r11 is used by indirect sibcall return as well, set before the
10614 	 epilogue and used after the epilogue.  */
10615       if (style)
10616         tmp = gen_rtx_REG (DImode, R11_REG);
10617       else
10618 	{
10619 	  gcc_assert (src != hard_frame_pointer_rtx
10620 		      && dest != hard_frame_pointer_rtx);
10621 	  tmp = hard_frame_pointer_rtx;
10622 	}
10623       insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10624       if (style < 0)
10625 	add_frame_related_expr = true;
10626 
10627       insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10628     }
10629 
10630   insn = emit_insn (insn);
10631   if (style >= 0)
10632     ix86_add_queued_cfa_restore_notes (insn);
10633 
10634   if (set_cfa)
10635     {
10636       rtx r;
10637 
10638       gcc_assert (m->fs.cfa_reg == src);
10639       m->fs.cfa_offset += INTVAL (offset);
10640       m->fs.cfa_reg = dest;
10641 
10642       r = gen_rtx_PLUS (Pmode, src, offset);
10643       r = gen_rtx_SET (VOIDmode, dest, r);
10644       add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10645       RTX_FRAME_RELATED_P (insn) = 1;
10646     }
10647   else if (style < 0)
10648     {
10649       RTX_FRAME_RELATED_P (insn) = 1;
10650       if (add_frame_related_expr)
10651 	{
10652 	  rtx r = gen_rtx_PLUS (Pmode, src, offset);
10653 	  r = gen_rtx_SET (VOIDmode, dest, r);
10654 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10655 	}
10656     }
10657 
10658   if (dest == stack_pointer_rtx)
10659     {
10660       HOST_WIDE_INT ooffset = m->fs.sp_offset;
10661       bool valid = m->fs.sp_valid;
10662 
10663       if (src == hard_frame_pointer_rtx)
10664 	{
10665 	  valid = m->fs.fp_valid;
10666 	  ooffset = m->fs.fp_offset;
10667 	}
10668       else if (src == crtl->drap_reg)
10669 	{
10670 	  valid = m->fs.drap_valid;
10671 	  ooffset = 0;
10672 	}
10673       else
10674 	{
10675 	  /* Else there are two possibilities: SP itself, which we set
10676 	     up as the default above.  Or EH_RETURN_STACKADJ_RTX, which is
10677 	     taken care of this by hand along the eh_return path.  */
10678 	  gcc_checking_assert (src == stack_pointer_rtx
10679 			       || offset == const0_rtx);
10680 	}
10681 
10682       m->fs.sp_offset = ooffset - INTVAL (offset);
10683       m->fs.sp_valid = valid;
10684     }
10685 }
10686 
10687 /* Find an available register to be used as dynamic realign argument
10688    pointer regsiter.  Such a register will be written in prologue and
10689    used in begin of body, so it must not be
10690 	1. parameter passing register.
10691 	2. GOT pointer.
10692    We reuse static-chain register if it is available.  Otherwise, we
10693    use DI for i386 and R13 for x86-64.  We chose R13 since it has
10694    shorter encoding.
10695 
10696    Return: the regno of chosen register.  */
10697 
10698 static unsigned int
10699 find_drap_reg (void)
10700 {
10701   tree decl = cfun->decl;
10702 
10703   if (TARGET_64BIT)
10704     {
10705       /* Use R13 for nested function or function need static chain.
10706 	 Since function with tail call may use any caller-saved
10707 	 registers in epilogue, DRAP must not use caller-saved
10708 	 register in such case.  */
10709       if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10710 	return R13_REG;
10711 
10712       return R10_REG;
10713     }
10714   else
10715     {
10716       /* Use DI for nested function or function need static chain.
10717 	 Since function with tail call may use any caller-saved
10718 	 registers in epilogue, DRAP must not use caller-saved
10719 	 register in such case.  */
10720       if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10721 	return DI_REG;
10722 
10723       /* Reuse static chain register if it isn't used for parameter
10724          passing.  */
10725       if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10726 	{
10727 	  unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10728 	  if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10729 	    return CX_REG;
10730 	}
10731       return DI_REG;
10732     }
10733 }
10734 
10735 /* Handle a "force_align_arg_pointer" attribute.  */
10736 
10737 static tree
10738 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
10739 					       tree, int, bool *no_add_attrs)
10740 {
10741   if (TREE_CODE (*node) != FUNCTION_TYPE
10742       && TREE_CODE (*node) != METHOD_TYPE
10743       && TREE_CODE (*node) != FIELD_DECL
10744       && TREE_CODE (*node) != TYPE_DECL)
10745     {
10746       warning (OPT_Wattributes, "%qE attribute only applies to functions",
10747 	       name);
10748       *no_add_attrs = true;
10749     }
10750 
10751   return NULL_TREE;
10752 }
10753 
10754 /* Return minimum incoming stack alignment.  */
10755 
10756 static unsigned int
10757 ix86_minimum_incoming_stack_boundary (bool sibcall)
10758 {
10759   unsigned int incoming_stack_boundary;
10760 
10761   /* Prefer the one specified at command line. */
10762   if (ix86_user_incoming_stack_boundary)
10763     incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10764   /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10765      if -mstackrealign is used, it isn't used for sibcall check and
10766      estimated stack alignment is 128bit.  */
10767   else if (!sibcall
10768 	   && ix86_force_align_arg_pointer
10769 	   && crtl->stack_alignment_estimated == 128)
10770     incoming_stack_boundary = MIN_STACK_BOUNDARY;
10771   else
10772     incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10773 
10774   /* Incoming stack alignment can be changed on individual functions
10775      via force_align_arg_pointer attribute.  We use the smallest
10776      incoming stack boundary.  */
10777   if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10778       && lookup_attribute (ix86_force_align_arg_pointer_string,
10779 			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10780     incoming_stack_boundary = MIN_STACK_BOUNDARY;
10781 
10782   /* The incoming stack frame has to be aligned at least at
10783      parm_stack_boundary.  */
10784   if (incoming_stack_boundary < crtl->parm_stack_boundary)
10785     incoming_stack_boundary = crtl->parm_stack_boundary;
10786 
10787   /* Stack at entrance of main is aligned by runtime.  We use the
10788      smallest incoming stack boundary. */
10789   if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10790       && DECL_NAME (current_function_decl)
10791       && MAIN_NAME_P (DECL_NAME (current_function_decl))
10792       && DECL_FILE_SCOPE_P (current_function_decl))
10793     incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10794 
10795   return incoming_stack_boundary;
10796 }
10797 
10798 /* Update incoming stack boundary and estimated stack alignment.  */
10799 
10800 static void
10801 ix86_update_stack_boundary (void)
10802 {
10803   ix86_incoming_stack_boundary
10804     = ix86_minimum_incoming_stack_boundary (false);
10805 
10806   /* x86_64 vararg needs 16byte stack alignment for register save
10807      area.  */
10808   if (TARGET_64BIT
10809       && cfun->stdarg
10810       && crtl->stack_alignment_estimated < 128)
10811     crtl->stack_alignment_estimated = 128;
10812 
10813   /* __tls_get_addr needs to be called with 16-byte aligned stack.  */
10814   if (ix86_tls_descriptor_calls_expanded_in_cfun
10815       && crtl->preferred_stack_boundary < 128)
10816     crtl->preferred_stack_boundary = 128;
10817 }
10818 
10819 /* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
10820    needed or an rtx for DRAP otherwise.  */
10821 
10822 static rtx
10823 ix86_get_drap_rtx (void)
10824 {
10825   if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10826     crtl->need_drap = true;
10827 
10828   if (stack_realign_drap)
10829     {
10830       /* Assign DRAP to vDRAP and returns vDRAP */
10831       unsigned int regno = find_drap_reg ();
10832       rtx drap_vreg;
10833       rtx arg_ptr;
10834       rtx_insn *seq, *insn;
10835 
10836       arg_ptr = gen_rtx_REG (Pmode, regno);
10837       crtl->drap_reg = arg_ptr;
10838 
10839       start_sequence ();
10840       drap_vreg = copy_to_reg (arg_ptr);
10841       seq = get_insns ();
10842       end_sequence ();
10843 
10844       insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10845       if (!optimize)
10846 	{
10847 	  add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10848 	  RTX_FRAME_RELATED_P (insn) = 1;
10849 	}
10850       return drap_vreg;
10851     }
10852   else
10853     return NULL;
10854 }
10855 
10856 /* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
10857 
10858 static rtx
10859 ix86_internal_arg_pointer (void)
10860 {
10861   return virtual_incoming_args_rtx;
10862 }
10863 
10864 struct scratch_reg {
10865   rtx reg;
10866   bool saved;
10867 };
10868 
10869 /* Return a short-lived scratch register for use on function entry.
10870    In 32-bit mode, it is valid only after the registers are saved
10871    in the prologue.  This register must be released by means of
10872    release_scratch_register_on_entry once it is dead.  */
10873 
10874 static void
10875 get_scratch_register_on_entry (struct scratch_reg *sr)
10876 {
10877   int regno;
10878 
10879   sr->saved = false;
10880 
10881   if (TARGET_64BIT)
10882     {
10883       /* We always use R11 in 64-bit mode.  */
10884       regno = R11_REG;
10885     }
10886   else
10887     {
10888       tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10889       bool fastcall_p
10890 	= lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10891       bool thiscall_p
10892 	= lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10893       bool static_chain_p = DECL_STATIC_CHAIN (decl);
10894       int regparm = ix86_function_regparm (fntype, decl);
10895       int drap_regno
10896 	= crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10897 
10898       /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10899 	  for the static chain register.  */
10900       if ((regparm < 1 || (fastcall_p && !static_chain_p))
10901 	  && drap_regno != AX_REG)
10902 	regno = AX_REG;
10903       /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10904 	  for the static chain register.  */
10905       else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10906         regno = AX_REG;
10907       else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10908 	regno = DX_REG;
10909       /* ecx is the static chain register.  */
10910       else if (regparm < 3 && !fastcall_p && !thiscall_p
10911 	       && !static_chain_p
10912 	       && drap_regno != CX_REG)
10913 	regno = CX_REG;
10914       else if (ix86_save_reg (BX_REG, true))
10915 	regno = BX_REG;
10916       /* esi is the static chain register.  */
10917       else if (!(regparm == 3 && static_chain_p)
10918 	       && ix86_save_reg (SI_REG, true))
10919 	regno = SI_REG;
10920       else if (ix86_save_reg (DI_REG, true))
10921 	regno = DI_REG;
10922       else
10923 	{
10924 	  regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10925 	  sr->saved = true;
10926 	}
10927     }
10928 
10929   sr->reg = gen_rtx_REG (Pmode, regno);
10930   if (sr->saved)
10931     {
10932       rtx insn = emit_insn (gen_push (sr->reg));
10933       RTX_FRAME_RELATED_P (insn) = 1;
10934     }
10935 }
10936 
10937 /* Release a scratch register obtained from the preceding function.  */
10938 
10939 static void
10940 release_scratch_register_on_entry (struct scratch_reg *sr)
10941 {
10942   if (sr->saved)
10943     {
10944       struct machine_function *m = cfun->machine;
10945       rtx x, insn = emit_insn (gen_pop (sr->reg));
10946 
10947       /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop.  */
10948       RTX_FRAME_RELATED_P (insn) = 1;
10949       x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10950       x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10951       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10952       m->fs.sp_offset -= UNITS_PER_WORD;
10953     }
10954 }
10955 
10956 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10957 
10958 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.  */
10959 
10960 static void
10961 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10962 {
10963   /* We skip the probe for the first interval + a small dope of 4 words and
10964      probe that many bytes past the specified size to maintain a protection
10965      area at the botton of the stack.  */
10966   const int dope = 4 * UNITS_PER_WORD;
10967   rtx size_rtx = GEN_INT (size), last;
10968 
10969   /* See if we have a constant small number of probes to generate.  If so,
10970      that's the easy case.  The run-time loop is made up of 11 insns in the
10971      generic case while the compile-time loop is made up of 3+2*(n-1) insns
10972      for n # of intervals.  */
10973   if (size <= 5 * PROBE_INTERVAL)
10974     {
10975       HOST_WIDE_INT i, adjust;
10976       bool first_probe = true;
10977 
10978       /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10979 	 values of N from 1 until it exceeds SIZE.  If only one probe is
10980 	 needed, this will not generate any code.  Then adjust and probe
10981 	 to PROBE_INTERVAL + SIZE.  */
10982       for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10983 	{
10984 	  if (first_probe)
10985 	    {
10986 	      adjust = 2 * PROBE_INTERVAL + dope;
10987 	      first_probe = false;
10988 	    }
10989 	  else
10990 	    adjust = PROBE_INTERVAL;
10991 
10992 	  emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10993 				  plus_constant (Pmode, stack_pointer_rtx,
10994 						 -adjust)));
10995 	  emit_stack_probe (stack_pointer_rtx);
10996 	}
10997 
10998       if (first_probe)
10999 	adjust = size + PROBE_INTERVAL + dope;
11000       else
11001         adjust = size + PROBE_INTERVAL - i;
11002 
11003       emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11004 			      plus_constant (Pmode, stack_pointer_rtx,
11005 					     -adjust)));
11006       emit_stack_probe (stack_pointer_rtx);
11007 
11008       /* Adjust back to account for the additional first interval.  */
11009       last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11010 				     plus_constant (Pmode, stack_pointer_rtx,
11011 						    PROBE_INTERVAL + dope)));
11012     }
11013 
11014   /* Otherwise, do the same as above, but in a loop.  Note that we must be
11015      extra careful with variables wrapping around because we might be at
11016      the very top (or the very bottom) of the address space and we have
11017      to be able to handle this case properly; in particular, we use an
11018      equality test for the loop condition.  */
11019   else
11020     {
11021       HOST_WIDE_INT rounded_size;
11022       struct scratch_reg sr;
11023 
11024       get_scratch_register_on_entry (&sr);
11025 
11026 
11027       /* Step 1: round SIZE to the previous multiple of the interval.  */
11028 
11029       rounded_size = size & -PROBE_INTERVAL;
11030 
11031 
11032       /* Step 2: compute initial and final value of the loop counter.  */
11033 
11034       /* SP = SP_0 + PROBE_INTERVAL.  */
11035       emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11036 			      plus_constant (Pmode, stack_pointer_rtx,
11037 					     - (PROBE_INTERVAL + dope))));
11038 
11039       /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE.  */
11040       emit_move_insn (sr.reg, GEN_INT (-rounded_size));
11041       emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
11042 			      gen_rtx_PLUS (Pmode, sr.reg,
11043 					    stack_pointer_rtx)));
11044 
11045 
11046       /* Step 3: the loop
11047 
11048 	 while (SP != LAST_ADDR)
11049 	   {
11050 	     SP = SP + PROBE_INTERVAL
11051 	     probe at SP
11052 	   }
11053 
11054 	 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
11055 	 values of N from 1 until it is equal to ROUNDED_SIZE.  */
11056 
11057       emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
11058 
11059 
11060       /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
11061 	 assert at compile-time that SIZE is equal to ROUNDED_SIZE.  */
11062 
11063       if (size != rounded_size)
11064 	{
11065 	  emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11066 			          plus_constant (Pmode, stack_pointer_rtx,
11067 						 rounded_size - size)));
11068 	  emit_stack_probe (stack_pointer_rtx);
11069 	}
11070 
11071       /* Adjust back to account for the additional first interval.  */
11072       last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11073 				     plus_constant (Pmode, stack_pointer_rtx,
11074 						    PROBE_INTERVAL + dope)));
11075 
11076       release_scratch_register_on_entry (&sr);
11077     }
11078 
11079   /* Even if the stack pointer isn't the CFA register, we need to correctly
11080      describe the adjustments made to it, in particular differentiate the
11081      frame-related ones from the frame-unrelated ones.  */
11082   if (size > 0)
11083     {
11084       rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11085       XVECEXP (expr, 0, 0)
11086 	= gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11087 		       plus_constant (Pmode, stack_pointer_rtx, -size));
11088       XVECEXP (expr, 0, 1)
11089 	= gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11090 		       plus_constant (Pmode, stack_pointer_rtx,
11091 				      PROBE_INTERVAL + dope + size));
11092       add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11093       RTX_FRAME_RELATED_P (last) = 1;
11094 
11095       cfun->machine->fs.sp_offset += size;
11096     }
11097 
11098   /* Make sure nothing is scheduled before we are done.  */
11099   emit_insn (gen_blockage ());
11100 }
11101 
11102 /* Adjust the stack pointer up to REG while probing it.  */
11103 
11104 const char *
11105 output_adjust_stack_and_probe (rtx reg)
11106 {
11107   static int labelno = 0;
11108   char loop_lab[32], end_lab[32];
11109   rtx xops[2];
11110 
11111   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11112   ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11113 
11114   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11115 
11116   /* Jump to END_LAB if SP == LAST_ADDR.  */
11117   xops[0] = stack_pointer_rtx;
11118   xops[1] = reg;
11119   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11120   fputs ("\tje\t", asm_out_file);
11121   assemble_name_raw (asm_out_file, end_lab);
11122   fputc ('\n', asm_out_file);
11123 
11124   /* SP = SP + PROBE_INTERVAL.  */
11125   xops[1] = GEN_INT (PROBE_INTERVAL);
11126   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11127 
11128   /* Probe at SP.  */
11129   xops[1] = const0_rtx;
11130   output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11131 
11132   fprintf (asm_out_file, "\tjmp\t");
11133   assemble_name_raw (asm_out_file, loop_lab);
11134   fputc ('\n', asm_out_file);
11135 
11136   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11137 
11138   return "";
11139 }
11140 
11141 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11142    inclusive.  These are offsets from the current stack pointer.  */
11143 
11144 static void
11145 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11146 {
11147   /* See if we have a constant small number of probes to generate.  If so,
11148      that's the easy case.  The run-time loop is made up of 7 insns in the
11149      generic case while the compile-time loop is made up of n insns for n #
11150      of intervals.  */
11151   if (size <= 7 * PROBE_INTERVAL)
11152     {
11153       HOST_WIDE_INT i;
11154 
11155       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11156 	 it exceeds SIZE.  If only one probe is needed, this will not
11157 	 generate any code.  Then probe at FIRST + SIZE.  */
11158       for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11159 	emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11160 					 -(first + i)));
11161 
11162       emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11163 				       -(first + size)));
11164     }
11165 
11166   /* Otherwise, do the same as above, but in a loop.  Note that we must be
11167      extra careful with variables wrapping around because we might be at
11168      the very top (or the very bottom) of the address space and we have
11169      to be able to handle this case properly; in particular, we use an
11170      equality test for the loop condition.  */
11171   else
11172     {
11173       HOST_WIDE_INT rounded_size, last;
11174       struct scratch_reg sr;
11175 
11176       get_scratch_register_on_entry (&sr);
11177 
11178 
11179       /* Step 1: round SIZE to the previous multiple of the interval.  */
11180 
11181       rounded_size = size & -PROBE_INTERVAL;
11182 
11183 
11184       /* Step 2: compute initial and final value of the loop counter.  */
11185 
11186       /* TEST_OFFSET = FIRST.  */
11187       emit_move_insn (sr.reg, GEN_INT (-first));
11188 
11189       /* LAST_OFFSET = FIRST + ROUNDED_SIZE.  */
11190       last = first + rounded_size;
11191 
11192 
11193       /* Step 3: the loop
11194 
11195 	 while (TEST_ADDR != LAST_ADDR)
11196 	   {
11197 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11198 	     probe at TEST_ADDR
11199 	   }
11200 
11201          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11202          until it is equal to ROUNDED_SIZE.  */
11203 
11204       emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11205 
11206 
11207       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11208 	 that SIZE is equal to ROUNDED_SIZE.  */
11209 
11210       if (size != rounded_size)
11211 	emit_stack_probe (plus_constant (Pmode,
11212 					 gen_rtx_PLUS (Pmode,
11213 						       stack_pointer_rtx,
11214 						       sr.reg),
11215 					 rounded_size - size));
11216 
11217       release_scratch_register_on_entry (&sr);
11218     }
11219 
11220   /* Make sure nothing is scheduled before we are done.  */
11221   emit_insn (gen_blockage ());
11222 }
11223 
11224 /* Probe a range of stack addresses from REG to END, inclusive.  These are
11225    offsets from the current stack pointer.  */
11226 
11227 const char *
11228 output_probe_stack_range (rtx reg, rtx end)
11229 {
11230   static int labelno = 0;
11231   char loop_lab[32], end_lab[32];
11232   rtx xops[3];
11233 
11234   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11235   ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11236 
11237   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11238 
11239   /* Jump to END_LAB if TEST_ADDR == LAST_ADDR.  */
11240   xops[0] = reg;
11241   xops[1] = end;
11242   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11243   fputs ("\tje\t", asm_out_file);
11244   assemble_name_raw (asm_out_file, end_lab);
11245   fputc ('\n', asm_out_file);
11246 
11247   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
11248   xops[1] = GEN_INT (PROBE_INTERVAL);
11249   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11250 
11251   /* Probe at TEST_ADDR.  */
11252   xops[0] = stack_pointer_rtx;
11253   xops[1] = reg;
11254   xops[2] = const0_rtx;
11255   output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11256 
11257   fprintf (asm_out_file, "\tjmp\t");
11258   assemble_name_raw (asm_out_file, loop_lab);
11259   fputc ('\n', asm_out_file);
11260 
11261   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11262 
11263   return "";
11264 }
11265 
11266 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11267    to be generated in correct form.  */
11268 static void
11269 ix86_finalize_stack_realign_flags (void)
11270 {
11271   /* Check if stack realign is really needed after reload, and
11272      stores result in cfun */
11273   unsigned int incoming_stack_boundary
11274     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11275        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11276   unsigned int stack_realign
11277     = (incoming_stack_boundary
11278        < (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
11279 	  ? crtl->max_used_stack_slot_alignment
11280 	  : crtl->stack_alignment_needed));
11281 
11282   if (crtl->stack_realign_finalized)
11283     {
11284       /* After stack_realign_needed is finalized, we can't no longer
11285 	 change it.  */
11286       gcc_assert (crtl->stack_realign_needed == stack_realign);
11287       return;
11288     }
11289 
11290   /* If the only reason for frame_pointer_needed is that we conservatively
11291      assumed stack realignment might be needed, but in the end nothing that
11292      needed the stack alignment had been spilled, clear frame_pointer_needed
11293      and say we don't need stack realignment.  */
11294   if (stack_realign
11295       && frame_pointer_needed
11296       && crtl->is_leaf
11297       && flag_omit_frame_pointer
11298       && crtl->sp_is_unchanging
11299       && !ix86_current_function_calls_tls_descriptor
11300       && !crtl->accesses_prior_frames
11301       && !cfun->calls_alloca
11302       && !crtl->calls_eh_return
11303       /* See ira_setup_eliminable_regset for the rationale.  */
11304       && !(STACK_CHECK_MOVING_SP
11305 	   && flag_stack_check
11306 	   && flag_exceptions
11307 	   && cfun->can_throw_non_call_exceptions)
11308       && !ix86_frame_pointer_required ()
11309       && get_frame_size () == 0
11310       && ix86_nsaved_sseregs () == 0
11311       && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11312     {
11313       HARD_REG_SET set_up_by_prologue, prologue_used;
11314       basic_block bb;
11315 
11316       CLEAR_HARD_REG_SET (prologue_used);
11317       CLEAR_HARD_REG_SET (set_up_by_prologue);
11318       add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11319       add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11320       add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11321 			   HARD_FRAME_POINTER_REGNUM);
11322       FOR_EACH_BB_FN (bb, cfun)
11323         {
11324           rtx_insn *insn;
11325 	  FOR_BB_INSNS (bb, insn)
11326 	    if (NONDEBUG_INSN_P (insn)
11327 		&& requires_stack_frame_p (insn, prologue_used,
11328 					   set_up_by_prologue))
11329 	      {
11330 		crtl->stack_realign_needed = stack_realign;
11331 		crtl->stack_realign_finalized = true;
11332 		return;
11333 	      }
11334 	}
11335 
11336       /* If drap has been set, but it actually isn't live at the start
11337 	 of the function, there is no reason to set it up.  */
11338       if (crtl->drap_reg)
11339 	{
11340 	  basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11341 	  if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11342 	    {
11343 	      crtl->drap_reg = NULL_RTX;
11344 	      crtl->need_drap = false;
11345 	    }
11346 	}
11347       else
11348 	cfun->machine->no_drap_save_restore = true;
11349 
11350       frame_pointer_needed = false;
11351       stack_realign = false;
11352       crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11353       crtl->stack_alignment_needed = incoming_stack_boundary;
11354       crtl->stack_alignment_estimated = incoming_stack_boundary;
11355       if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11356 	crtl->preferred_stack_boundary = incoming_stack_boundary;
11357       df_finish_pass (true);
11358       df_scan_alloc (NULL);
11359       df_scan_blocks ();
11360       df_compute_regs_ever_live (true);
11361       df_analyze ();
11362     }
11363 
11364   crtl->stack_realign_needed = stack_realign;
11365   crtl->stack_realign_finalized = true;
11366 }
11367 
11368 /* Delete SET_GOT right after entry block if it is allocated to reg.  */
11369 
11370 static void
11371 ix86_elim_entry_set_got (rtx reg)
11372 {
11373   basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11374   rtx_insn *c_insn = BB_HEAD (bb);
11375   if (!NONDEBUG_INSN_P (c_insn))
11376     c_insn = next_nonnote_nondebug_insn (c_insn);
11377   if (c_insn && NONJUMP_INSN_P (c_insn))
11378     {
11379       rtx pat = PATTERN (c_insn);
11380       if (GET_CODE (pat) == PARALLEL)
11381 	{
11382 	  rtx vec = XVECEXP (pat, 0, 0);
11383 	  if (GET_CODE (vec) == SET
11384 	      && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11385 	      && REGNO (XEXP (vec, 0)) == REGNO (reg))
11386 	    delete_insn (c_insn);
11387 	}
11388     }
11389 }
11390 
11391 /* Expand the prologue into a bunch of separate insns.  */
11392 
11393 void
11394 ix86_expand_prologue (void)
11395 {
11396   struct machine_function *m = cfun->machine;
11397   rtx insn, t;
11398   struct ix86_frame frame;
11399   HOST_WIDE_INT allocate;
11400   bool int_registers_saved;
11401   bool sse_registers_saved;
11402 
11403   ix86_finalize_stack_realign_flags ();
11404 
11405   /* DRAP should not coexist with stack_realign_fp */
11406   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11407 
11408   memset (&m->fs, 0, sizeof (m->fs));
11409 
11410   /* Initialize CFA state for before the prologue.  */
11411   m->fs.cfa_reg = stack_pointer_rtx;
11412   m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11413 
11414   /* Track SP offset to the CFA.  We continue tracking this after we've
11415      swapped the CFA register away from SP.  In the case of re-alignment
11416      this is fudged; we're interested to offsets within the local frame.  */
11417   m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11418   m->fs.sp_valid = true;
11419 
11420   ix86_compute_frame_layout (&frame);
11421 
11422   if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11423     {
11424       /* We should have already generated an error for any use of
11425          ms_hook on a nested function.  */
11426       gcc_checking_assert (!ix86_static_chain_on_stack);
11427 
11428       /* Check if profiling is active and we shall use profiling before
11429          prologue variant. If so sorry.  */
11430       if (crtl->profile && flag_fentry != 0)
11431         sorry ("ms_hook_prologue attribute isn%'t compatible "
11432 	       "with -mfentry for 32-bit");
11433 
11434       /* In ix86_asm_output_function_label we emitted:
11435 	 8b ff     movl.s %edi,%edi
11436 	 55        push   %ebp
11437 	 8b ec     movl.s %esp,%ebp
11438 
11439 	 This matches the hookable function prologue in Win32 API
11440 	 functions in Microsoft Windows XP Service Pack 2 and newer.
11441 	 Wine uses this to enable Windows apps to hook the Win32 API
11442 	 functions provided by Wine.
11443 
11444 	 What that means is that we've already set up the frame pointer.  */
11445 
11446       if (frame_pointer_needed
11447 	  && !(crtl->drap_reg && crtl->stack_realign_needed))
11448 	{
11449 	  rtx push, mov;
11450 
11451 	  /* We've decided to use the frame pointer already set up.
11452 	     Describe this to the unwinder by pretending that both
11453 	     push and mov insns happen right here.
11454 
11455 	     Putting the unwind info here at the end of the ms_hook
11456 	     is done so that we can make absolutely certain we get
11457 	     the required byte sequence at the start of the function,
11458 	     rather than relying on an assembler that can produce
11459 	     the exact encoding required.
11460 
11461 	     However it does mean (in the unpatched case) that we have
11462 	     a 1 insn window where the asynchronous unwind info is
11463 	     incorrect.  However, if we placed the unwind info at
11464 	     its correct location we would have incorrect unwind info
11465 	     in the patched case.  Which is probably all moot since
11466 	     I don't expect Wine generates dwarf2 unwind info for the
11467 	     system libraries that use this feature.  */
11468 
11469 	  insn = emit_insn (gen_blockage ());
11470 
11471 	  push = gen_push (hard_frame_pointer_rtx);
11472 	  mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11473 			     stack_pointer_rtx);
11474 	  RTX_FRAME_RELATED_P (push) = 1;
11475 	  RTX_FRAME_RELATED_P (mov) = 1;
11476 
11477 	  RTX_FRAME_RELATED_P (insn) = 1;
11478 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11479 			gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11480 
11481 	  /* Note that gen_push incremented m->fs.cfa_offset, even
11482 	     though we didn't emit the push insn here.  */
11483 	  m->fs.cfa_reg = hard_frame_pointer_rtx;
11484 	  m->fs.fp_offset = m->fs.cfa_offset;
11485 	  m->fs.fp_valid = true;
11486 	}
11487       else
11488 	{
11489 	  /* The frame pointer is not needed so pop %ebp again.
11490 	     This leaves us with a pristine state.  */
11491 	  emit_insn (gen_pop (hard_frame_pointer_rtx));
11492 	}
11493     }
11494 
11495   /* The first insn of a function that accepts its static chain on the
11496      stack is to push the register that would be filled in by a direct
11497      call.  This insn will be skipped by the trampoline.  */
11498   else if (ix86_static_chain_on_stack)
11499     {
11500       insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11501       emit_insn (gen_blockage ());
11502 
11503       /* We don't want to interpret this push insn as a register save,
11504 	 only as a stack adjustment.  The real copy of the register as
11505 	 a save will be done later, if needed.  */
11506       t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11507       t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11508       add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11509       RTX_FRAME_RELATED_P (insn) = 1;
11510     }
11511 
11512   /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11513      of DRAP is needed and stack realignment is really needed after reload */
11514   if (stack_realign_drap)
11515     {
11516       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11517 
11518       /* Only need to push parameter pointer reg if it is caller saved.  */
11519       if (!call_used_regs[REGNO (crtl->drap_reg)])
11520 	{
11521 	  /* Push arg pointer reg */
11522 	  insn = emit_insn (gen_push (crtl->drap_reg));
11523 	  RTX_FRAME_RELATED_P (insn) = 1;
11524 	}
11525 
11526       /* Grab the argument pointer.  */
11527       t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11528       insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11529       RTX_FRAME_RELATED_P (insn) = 1;
11530       m->fs.cfa_reg = crtl->drap_reg;
11531       m->fs.cfa_offset = 0;
11532 
11533       /* Align the stack.  */
11534       insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11535 					stack_pointer_rtx,
11536 					GEN_INT (-align_bytes)));
11537       RTX_FRAME_RELATED_P (insn) = 1;
11538 
11539       /* Replicate the return address on the stack so that return
11540 	 address can be reached via (argp - 1) slot.  This is needed
11541 	 to implement macro RETURN_ADDR_RTX and intrinsic function
11542 	 expand_builtin_return_addr etc.  */
11543       t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11544       t = gen_frame_mem (word_mode, t);
11545       insn = emit_insn (gen_push (t));
11546       RTX_FRAME_RELATED_P (insn) = 1;
11547 
11548       /* For the purposes of frame and register save area addressing,
11549 	 we've started over with a new frame.  */
11550       m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11551       m->fs.realigned = true;
11552     }
11553 
11554   int_registers_saved = (frame.nregs == 0);
11555   sse_registers_saved = (frame.nsseregs == 0);
11556 
11557   if (frame_pointer_needed && !m->fs.fp_valid)
11558     {
11559       /* Note: AT&T enter does NOT have reversed args.  Enter is probably
11560          slower on all targets.  Also sdb doesn't like it.  */
11561       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11562       RTX_FRAME_RELATED_P (insn) = 1;
11563 
11564       /* Push registers now, before setting the frame pointer
11565 	 on SEH target.  */
11566       if (!int_registers_saved
11567 	  && TARGET_SEH
11568 	  && !frame.save_regs_using_mov)
11569 	{
11570 	  ix86_emit_save_regs ();
11571 	  int_registers_saved = true;
11572 	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11573 	}
11574 
11575       if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11576 	{
11577 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11578 	  RTX_FRAME_RELATED_P (insn) = 1;
11579 
11580 	  if (m->fs.cfa_reg == stack_pointer_rtx)
11581 	    m->fs.cfa_reg = hard_frame_pointer_rtx;
11582 	  m->fs.fp_offset = m->fs.sp_offset;
11583 	  m->fs.fp_valid = true;
11584 	}
11585     }
11586 
11587   if (!int_registers_saved)
11588     {
11589       /* If saving registers via PUSH, do so now.  */
11590       if (!frame.save_regs_using_mov)
11591 	{
11592 	  ix86_emit_save_regs ();
11593 	  int_registers_saved = true;
11594 	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11595 	}
11596 
11597       /* When using red zone we may start register saving before allocating
11598 	 the stack frame saving one cycle of the prologue.  However, avoid
11599 	 doing this if we have to probe the stack; at least on x86_64 the
11600 	 stack probe can turn into a call that clobbers a red zone location. */
11601       else if (ix86_using_red_zone ()
11602 	       && (! TARGET_STACK_PROBE
11603 		   || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11604 	{
11605 	  ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11606 	  int_registers_saved = true;
11607 	}
11608     }
11609 
11610   if (stack_realign_fp)
11611     {
11612       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11613       gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11614 
11615       /* The computation of the size of the re-aligned stack frame means
11616 	 that we must allocate the size of the register save area before
11617 	 performing the actual alignment.  Otherwise we cannot guarantee
11618 	 that there's enough storage above the realignment point.  */
11619       if (m->fs.sp_offset != frame.sse_reg_save_offset)
11620         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11621 				   GEN_INT (m->fs.sp_offset
11622 					    - frame.sse_reg_save_offset),
11623 				   -1, false);
11624 
11625       /* Align the stack.  */
11626       insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11627 					stack_pointer_rtx,
11628 					GEN_INT (-align_bytes)));
11629 
11630       /* For the purposes of register save area addressing, the stack
11631          pointer is no longer valid.  As for the value of sp_offset,
11632 	 see ix86_compute_frame_layout, which we need to match in order
11633 	 to pass verification of stack_pointer_offset at the end.  */
11634       m->fs.sp_offset = (m->fs.sp_offset + align_bytes - 1) & -align_bytes;
11635       m->fs.sp_valid = false;
11636     }
11637 
11638   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11639 
11640   if (flag_stack_usage_info)
11641     {
11642       /* We start to count from ARG_POINTER.  */
11643       HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11644 
11645       /* If it was realigned, take into account the fake frame.  */
11646       if (stack_realign_drap)
11647 	{
11648 	  if (ix86_static_chain_on_stack)
11649 	    stack_size += UNITS_PER_WORD;
11650 
11651 	  if (!call_used_regs[REGNO (crtl->drap_reg)])
11652 	    stack_size += UNITS_PER_WORD;
11653 
11654 	  /* This over-estimates by 1 minimal-stack-alignment-unit but
11655 	     mitigates that by counting in the new return address slot.  */
11656 	  current_function_dynamic_stack_size
11657 	    += crtl->stack_alignment_needed / BITS_PER_UNIT;
11658 	}
11659 
11660       current_function_static_stack_size = stack_size;
11661     }
11662 
11663   /* On SEH target with very large frame size, allocate an area to save
11664      SSE registers (as the very large allocation won't be described).  */
11665   if (TARGET_SEH
11666       && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11667       && !sse_registers_saved)
11668     {
11669       HOST_WIDE_INT sse_size =
11670 	frame.sse_reg_save_offset - frame.reg_save_offset;
11671 
11672       gcc_assert (int_registers_saved);
11673 
11674       /* No need to do stack checking as the area will be immediately
11675 	 written.  */
11676       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11677 			         GEN_INT (-sse_size), -1,
11678 				 m->fs.cfa_reg == stack_pointer_rtx);
11679       allocate -= sse_size;
11680       ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11681       sse_registers_saved = true;
11682     }
11683 
11684   /* The stack has already been decremented by the instruction calling us
11685      so probe if the size is non-negative to preserve the protection area.  */
11686   if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11687     {
11688       /* We expect the registers to be saved when probes are used.  */
11689       gcc_assert (int_registers_saved);
11690 
11691       if (STACK_CHECK_MOVING_SP)
11692 	{
11693 	  if (!(crtl->is_leaf && !cfun->calls_alloca
11694 		&& allocate <= PROBE_INTERVAL))
11695 	    {
11696 	      ix86_adjust_stack_and_probe (allocate);
11697 	      allocate = 0;
11698 	    }
11699 	}
11700       else
11701 	{
11702 	  HOST_WIDE_INT size = allocate;
11703 
11704 	  if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11705 	    size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11706 
11707 	  if (TARGET_STACK_PROBE)
11708 	    {
11709 	      if (crtl->is_leaf && !cfun->calls_alloca)
11710 		{
11711 		  if (size > PROBE_INTERVAL)
11712 		    ix86_emit_probe_stack_range (0, size);
11713 		}
11714 	      else
11715 		ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11716 	    }
11717 	  else
11718 	    {
11719 	      if (crtl->is_leaf && !cfun->calls_alloca)
11720 		{
11721 		  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11722 		    ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11723 						 size - STACK_CHECK_PROTECT);
11724 		}
11725 	      else
11726 		ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11727 	    }
11728 	}
11729     }
11730 
11731   if (allocate == 0)
11732     ;
11733   else if (!ix86_target_stack_probe ()
11734 	   || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11735     {
11736       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11737 			         GEN_INT (-allocate), -1,
11738 			         m->fs.cfa_reg == stack_pointer_rtx);
11739     }
11740   else
11741     {
11742       rtx eax = gen_rtx_REG (Pmode, AX_REG);
11743       rtx r10 = NULL;
11744       rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11745       const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11746       bool eax_live = ix86_eax_live_at_start_p ();
11747       bool r10_live = false;
11748 
11749       if (TARGET_64BIT)
11750         r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11751 
11752       if (eax_live)
11753 	{
11754 	  insn = emit_insn (gen_push (eax));
11755 	  allocate -= UNITS_PER_WORD;
11756 	  /* Note that SEH directives need to continue tracking the stack
11757 	     pointer even after the frame pointer has been set up.  */
11758 	  if (sp_is_cfa_reg || TARGET_SEH)
11759 	    {
11760 	      if (sp_is_cfa_reg)
11761 		m->fs.cfa_offset += UNITS_PER_WORD;
11762 	      RTX_FRAME_RELATED_P (insn) = 1;
11763 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11764 			    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11765 					 plus_constant (Pmode, stack_pointer_rtx,
11766 							-UNITS_PER_WORD)));
11767 	    }
11768 	}
11769 
11770       if (r10_live)
11771 	{
11772 	  r10 = gen_rtx_REG (Pmode, R10_REG);
11773 	  insn = emit_insn (gen_push (r10));
11774 	  allocate -= UNITS_PER_WORD;
11775 	  if (sp_is_cfa_reg || TARGET_SEH)
11776 	    {
11777 	      if (sp_is_cfa_reg)
11778 		m->fs.cfa_offset += UNITS_PER_WORD;
11779 	      RTX_FRAME_RELATED_P (insn) = 1;
11780 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11781 			    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11782 					 plus_constant (Pmode, stack_pointer_rtx,
11783 							-UNITS_PER_WORD)));
11784 	    }
11785 	}
11786 
11787       emit_move_insn (eax, GEN_INT (allocate));
11788       emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11789 
11790       /* Use the fact that AX still contains ALLOCATE.  */
11791       adjust_stack_insn = (Pmode == DImode
11792 			   ? gen_pro_epilogue_adjust_stack_di_sub
11793 			   : gen_pro_epilogue_adjust_stack_si_sub);
11794 
11795       insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11796 					   stack_pointer_rtx, eax));
11797 
11798       if (sp_is_cfa_reg || TARGET_SEH)
11799 	{
11800 	  if (sp_is_cfa_reg)
11801 	    m->fs.cfa_offset += allocate;
11802 	  RTX_FRAME_RELATED_P (insn) = 1;
11803 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11804 			gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11805 				     plus_constant (Pmode, stack_pointer_rtx,
11806 						    -allocate)));
11807 	}
11808       m->fs.sp_offset += allocate;
11809 
11810       /* Use stack_pointer_rtx for relative addressing so that code
11811 	 works for realigned stack, too.  */
11812       if (r10_live && eax_live)
11813         {
11814 	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11815 	  emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11816 			  gen_frame_mem (word_mode, t));
11817 	  t = plus_constant (Pmode, t, UNITS_PER_WORD);
11818 	  emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11819 			  gen_frame_mem (word_mode, t));
11820 	}
11821       else if (eax_live || r10_live)
11822 	{
11823 	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11824 	  emit_move_insn (gen_rtx_REG (word_mode,
11825 				       (eax_live ? AX_REG : R10_REG)),
11826 			  gen_frame_mem (word_mode, t));
11827 	}
11828     }
11829   gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11830 
11831   /* If we havn't already set up the frame pointer, do so now.  */
11832   if (frame_pointer_needed && !m->fs.fp_valid)
11833     {
11834       insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11835 			    GEN_INT (frame.stack_pointer_offset
11836 				     - frame.hard_frame_pointer_offset));
11837       insn = emit_insn (insn);
11838       RTX_FRAME_RELATED_P (insn) = 1;
11839       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11840 
11841       if (m->fs.cfa_reg == stack_pointer_rtx)
11842 	m->fs.cfa_reg = hard_frame_pointer_rtx;
11843       m->fs.fp_offset = frame.hard_frame_pointer_offset;
11844       m->fs.fp_valid = true;
11845     }
11846 
11847   if (!int_registers_saved)
11848     ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11849   if (!sse_registers_saved)
11850     ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11851 
11852   /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11853      in PROLOGUE.  */
11854   if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11855     {
11856       rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11857       insn = emit_insn (gen_set_got (pic));
11858       RTX_FRAME_RELATED_P (insn) = 1;
11859       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11860       emit_insn (gen_prologue_use (pic));
11861       /* Deleting already emmitted SET_GOT if exist and allocated to
11862 	 REAL_PIC_OFFSET_TABLE_REGNUM.  */
11863       ix86_elim_entry_set_got (pic);
11864     }
11865 
11866   if (crtl->drap_reg && !crtl->stack_realign_needed)
11867     {
11868       /* vDRAP is setup but after reload it turns out stack realign
11869          isn't necessary, here we will emit prologue to setup DRAP
11870          without stack realign adjustment */
11871       t = choose_baseaddr (0);
11872       emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11873     }
11874 
11875   /* Prevent instructions from being scheduled into register save push
11876      sequence when access to the redzone area is done through frame pointer.
11877      The offset between the frame pointer and the stack pointer is calculated
11878      relative to the value of the stack pointer at the end of the function
11879      prologue, and moving instructions that access redzone area via frame
11880      pointer inside push sequence violates this assumption.  */
11881   if (frame_pointer_needed && frame.red_zone_size)
11882     emit_insn (gen_memory_blockage ());
11883 
11884   /* Emit cld instruction if stringops are used in the function.  */
11885   if (TARGET_CLD && ix86_current_function_needs_cld)
11886     emit_insn (gen_cld ());
11887 
11888   /* SEH requires that the prologue end within 256 bytes of the start of
11889      the function.  Prevent instruction schedules that would extend that.
11890      Further, prevent alloca modifications to the stack pointer from being
11891      combined with prologue modifications.  */
11892   if (TARGET_SEH)
11893     emit_insn (gen_prologue_use (stack_pointer_rtx));
11894 }
11895 
11896 /* Emit code to restore REG using a POP insn.  */
11897 
11898 static void
11899 ix86_emit_restore_reg_using_pop (rtx reg)
11900 {
11901   struct machine_function *m = cfun->machine;
11902   rtx insn = emit_insn (gen_pop (reg));
11903 
11904   ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11905   m->fs.sp_offset -= UNITS_PER_WORD;
11906 
11907   if (m->fs.cfa_reg == crtl->drap_reg
11908       && REGNO (reg) == REGNO (crtl->drap_reg))
11909     {
11910       /* Previously we'd represented the CFA as an expression
11911 	 like *(%ebp - 8).  We've just popped that value from
11912 	 the stack, which means we need to reset the CFA to
11913 	 the drap register.  This will remain until we restore
11914 	 the stack pointer.  */
11915       add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11916       RTX_FRAME_RELATED_P (insn) = 1;
11917 
11918       /* This means that the DRAP register is valid for addressing too.  */
11919       m->fs.drap_valid = true;
11920       return;
11921     }
11922 
11923   if (m->fs.cfa_reg == stack_pointer_rtx)
11924     {
11925       rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11926       x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11927       add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11928       RTX_FRAME_RELATED_P (insn) = 1;
11929 
11930       m->fs.cfa_offset -= UNITS_PER_WORD;
11931     }
11932 
11933   /* When the frame pointer is the CFA, and we pop it, we are
11934      swapping back to the stack pointer as the CFA.  This happens
11935      for stack frames that don't allocate other data, so we assume
11936      the stack pointer is now pointing at the return address, i.e.
11937      the function entry state, which makes the offset be 1 word.  */
11938   if (reg == hard_frame_pointer_rtx)
11939     {
11940       m->fs.fp_valid = false;
11941       if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11942 	{
11943 	  m->fs.cfa_reg = stack_pointer_rtx;
11944 	  m->fs.cfa_offset -= UNITS_PER_WORD;
11945 
11946 	  add_reg_note (insn, REG_CFA_DEF_CFA,
11947 			gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11948 				      GEN_INT (m->fs.cfa_offset)));
11949 	  RTX_FRAME_RELATED_P (insn) = 1;
11950 	}
11951     }
11952 }
11953 
11954 /* Emit code to restore saved registers using POP insns.  */
11955 
11956 static void
11957 ix86_emit_restore_regs_using_pop (void)
11958 {
11959   unsigned int regno;
11960 
11961   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11962     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11963       ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11964 }
11965 
11966 /* Emit code and notes for the LEAVE instruction.  */
11967 
11968 static void
11969 ix86_emit_leave (void)
11970 {
11971   struct machine_function *m = cfun->machine;
11972   rtx insn = emit_insn (ix86_gen_leave ());
11973 
11974   ix86_add_queued_cfa_restore_notes (insn);
11975 
11976   gcc_assert (m->fs.fp_valid);
11977   m->fs.sp_valid = true;
11978   m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11979   m->fs.fp_valid = false;
11980 
11981   if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11982     {
11983       m->fs.cfa_reg = stack_pointer_rtx;
11984       m->fs.cfa_offset = m->fs.sp_offset;
11985 
11986       add_reg_note (insn, REG_CFA_DEF_CFA,
11987 		    plus_constant (Pmode, stack_pointer_rtx,
11988 				   m->fs.sp_offset));
11989       RTX_FRAME_RELATED_P (insn) = 1;
11990     }
11991   ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11992 			     m->fs.fp_offset);
11993 }
11994 
11995 /* Emit code to restore saved registers using MOV insns.
11996    First register is restored from CFA - CFA_OFFSET.  */
11997 static void
11998 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11999 				  bool maybe_eh_return)
12000 {
12001   struct machine_function *m = cfun->machine;
12002   unsigned int regno;
12003 
12004   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12005     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12006       {
12007 	rtx reg = gen_rtx_REG (word_mode, regno);
12008 	rtx insn, mem;
12009 
12010 	mem = choose_baseaddr (cfa_offset);
12011 	mem = gen_frame_mem (word_mode, mem);
12012 	insn = emit_move_insn (reg, mem);
12013 
12014         if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
12015 	  {
12016 	    /* Previously we'd represented the CFA as an expression
12017 	       like *(%ebp - 8).  We've just popped that value from
12018 	       the stack, which means we need to reset the CFA to
12019 	       the drap register.  This will remain until we restore
12020 	       the stack pointer.  */
12021 	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
12022 	    RTX_FRAME_RELATED_P (insn) = 1;
12023 
12024 	    /* This means that the DRAP register is valid for addressing.  */
12025 	    m->fs.drap_valid = true;
12026 	  }
12027 	else
12028 	  ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
12029 
12030 	cfa_offset -= UNITS_PER_WORD;
12031       }
12032 }
12033 
12034 /* Emit code to restore saved registers using MOV insns.
12035    First register is restored from CFA - CFA_OFFSET.  */
12036 static void
12037 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
12038 				      bool maybe_eh_return)
12039 {
12040   unsigned int regno;
12041 
12042   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12043     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12044       {
12045 	rtx reg = gen_rtx_REG (V4SFmode, regno);
12046 	rtx mem;
12047  	unsigned int align;
12048 
12049 	mem = choose_baseaddr (cfa_offset);
12050 	mem = gen_rtx_MEM (V4SFmode, mem);
12051 
12052  	/* The location is aligned up to INCOMING_STACK_BOUNDARY.  */
12053 	align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
12054  	set_mem_align (mem, align);
12055 
12056  	/* SSE saves are not within re-aligned local stack frame.
12057  	   In case INCOMING_STACK_BOUNDARY is misaligned, we have
12058  	   to emit unaligned load.  */
12059  	if (align < 128)
12060  	  {
12061  	    rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem),
12062  					 UNSPEC_LOADU);
12063  	    emit_insn (gen_rtx_SET (VOIDmode, reg, unspec));
12064  	  }
12065  	else
12066  	  emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
12067 
12068 	ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
12069 
12070 	cfa_offset -= 16;
12071       }
12072 }
12073 
12074 /* Restore function stack, frame, and registers.  */
12075 
12076 void
12077 ix86_expand_epilogue (int style)
12078 {
12079   struct machine_function *m = cfun->machine;
12080   struct machine_frame_state frame_state_save = m->fs;
12081   struct ix86_frame frame;
12082   bool restore_regs_via_mov;
12083   bool using_drap;
12084 
12085   ix86_finalize_stack_realign_flags ();
12086   ix86_compute_frame_layout (&frame);
12087 
12088   m->fs.sp_valid = (!frame_pointer_needed
12089 		    || (crtl->sp_is_unchanging
12090 			&& !stack_realign_fp));
12091   gcc_assert (!m->fs.sp_valid
12092 	      || m->fs.sp_offset == frame.stack_pointer_offset);
12093 
12094   /* The FP must be valid if the frame pointer is present.  */
12095   gcc_assert (frame_pointer_needed == m->fs.fp_valid);
12096   gcc_assert (!m->fs.fp_valid
12097 	      || m->fs.fp_offset == frame.hard_frame_pointer_offset);
12098 
12099   /* We must have *some* valid pointer to the stack frame.  */
12100   gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
12101 
12102   /* The DRAP is never valid at this point.  */
12103   gcc_assert (!m->fs.drap_valid);
12104 
12105   /* See the comment about red zone and frame
12106      pointer usage in ix86_expand_prologue.  */
12107   if (frame_pointer_needed && frame.red_zone_size)
12108     emit_insn (gen_memory_blockage ());
12109 
12110   using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12111   gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12112 
12113   /* Determine the CFA offset of the end of the red-zone.  */
12114   m->fs.red_zone_offset = 0;
12115   if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12116     {
12117       /* The red-zone begins below the return address.  */
12118       m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12119 
12120       /* When the register save area is in the aligned portion of
12121          the stack, determine the maximum runtime displacement that
12122 	 matches up with the aligned frame.  */
12123       if (stack_realign_drap)
12124 	m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12125 				  + UNITS_PER_WORD);
12126     }
12127 
12128   /* Special care must be taken for the normal return case of a function
12129      using eh_return: the eax and edx registers are marked as saved, but
12130      not restored along this path.  Adjust the save location to match.  */
12131   if (crtl->calls_eh_return && style != 2)
12132     frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12133 
12134   /* EH_RETURN requires the use of moves to function properly.  */
12135   if (crtl->calls_eh_return)
12136     restore_regs_via_mov = true;
12137   /* SEH requires the use of pops to identify the epilogue.  */
12138   else if (TARGET_SEH)
12139     restore_regs_via_mov = false;
12140   /* If we're only restoring one register and sp is not valid then
12141      using a move instruction to restore the register since it's
12142      less work than reloading sp and popping the register.  */
12143   else if (!m->fs.sp_valid && frame.nregs <= 1)
12144     restore_regs_via_mov = true;
12145   else if (TARGET_EPILOGUE_USING_MOVE
12146 	   && cfun->machine->use_fast_prologue_epilogue
12147 	   && (frame.nregs > 1
12148 	       || m->fs.sp_offset != frame.reg_save_offset))
12149     restore_regs_via_mov = true;
12150   else if (frame_pointer_needed
12151 	   && !frame.nregs
12152 	   && m->fs.sp_offset != frame.reg_save_offset)
12153     restore_regs_via_mov = true;
12154   else if (frame_pointer_needed
12155 	   && TARGET_USE_LEAVE
12156 	   && cfun->machine->use_fast_prologue_epilogue
12157 	   && frame.nregs == 1)
12158     restore_regs_via_mov = true;
12159   else
12160     restore_regs_via_mov = false;
12161 
12162   if (restore_regs_via_mov || frame.nsseregs)
12163     {
12164       /* Ensure that the entire register save area is addressable via
12165 	 the stack pointer, if we will restore via sp.  */
12166       if (TARGET_64BIT
12167 	  && m->fs.sp_offset > 0x7fffffff
12168 	  && !(m->fs.fp_valid || m->fs.drap_valid)
12169 	  && (frame.nsseregs + frame.nregs) != 0)
12170 	{
12171 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12172 				     GEN_INT (m->fs.sp_offset
12173 					      - frame.sse_reg_save_offset),
12174 				     style,
12175 				     m->fs.cfa_reg == stack_pointer_rtx);
12176 	}
12177     }
12178 
12179   /* If there are any SSE registers to restore, then we have to do it
12180      via moves, since there's obviously no pop for SSE regs.  */
12181   if (frame.nsseregs)
12182     ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12183 					  style == 2);
12184 
12185   if (restore_regs_via_mov)
12186     {
12187       rtx t;
12188 
12189       if (frame.nregs)
12190 	ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12191 
12192       /* eh_return epilogues need %ecx added to the stack pointer.  */
12193       if (style == 2)
12194 	{
12195 	  rtx insn, sa = EH_RETURN_STACKADJ_RTX;
12196 
12197 	  /* Stack align doesn't work with eh_return.  */
12198 	  gcc_assert (!stack_realign_drap);
12199 	  /* Neither does regparm nested functions.  */
12200 	  gcc_assert (!ix86_static_chain_on_stack);
12201 
12202 	  if (frame_pointer_needed)
12203 	    {
12204 	      t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12205 	      t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12206 	      emit_insn (gen_rtx_SET (VOIDmode, sa, t));
12207 
12208 	      t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12209 	      insn = emit_move_insn (hard_frame_pointer_rtx, t);
12210 
12211 	      /* Note that we use SA as a temporary CFA, as the return
12212 		 address is at the proper place relative to it.  We
12213 		 pretend this happens at the FP restore insn because
12214 		 prior to this insn the FP would be stored at the wrong
12215 		 offset relative to SA, and after this insn we have no
12216 		 other reasonable register to use for the CFA.  We don't
12217 		 bother resetting the CFA to the SP for the duration of
12218 		 the return insn.  */
12219 	      add_reg_note (insn, REG_CFA_DEF_CFA,
12220 			    plus_constant (Pmode, sa, UNITS_PER_WORD));
12221 	      ix86_add_queued_cfa_restore_notes (insn);
12222 	      add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12223 	      RTX_FRAME_RELATED_P (insn) = 1;
12224 
12225 	      m->fs.cfa_reg = sa;
12226 	      m->fs.cfa_offset = UNITS_PER_WORD;
12227 	      m->fs.fp_valid = false;
12228 
12229 	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12230 					 const0_rtx, style, false);
12231 	    }
12232 	  else
12233 	    {
12234 	      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12235 	      t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12236 	      insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12237 	      ix86_add_queued_cfa_restore_notes (insn);
12238 
12239 	      gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12240 	      if (m->fs.cfa_offset != UNITS_PER_WORD)
12241 		{
12242 		  m->fs.cfa_offset = UNITS_PER_WORD;
12243 		  add_reg_note (insn, REG_CFA_DEF_CFA,
12244 				plus_constant (Pmode, stack_pointer_rtx,
12245 					       UNITS_PER_WORD));
12246 		  RTX_FRAME_RELATED_P (insn) = 1;
12247 		}
12248 	    }
12249 	  m->fs.sp_offset = UNITS_PER_WORD;
12250 	  m->fs.sp_valid = true;
12251 	}
12252     }
12253   else
12254     {
12255       /* SEH requires that the function end with (1) a stack adjustment
12256 	 if necessary, (2) a sequence of pops, and (3) a return or
12257 	 jump instruction.  Prevent insns from the function body from
12258 	 being scheduled into this sequence.  */
12259       if (TARGET_SEH)
12260 	{
12261 	  /* Prevent a catch region from being adjacent to the standard
12262 	     epilogue sequence.  Unfortuantely crtl->uses_eh_lsda nor
12263 	     several other flags that would be interesting to test are
12264 	     not yet set up.  */
12265 	  if (flag_non_call_exceptions)
12266 	    emit_insn (gen_nops (const1_rtx));
12267 	  else
12268 	    emit_insn (gen_blockage ());
12269 	}
12270 
12271       /* First step is to deallocate the stack frame so that we can
12272 	 pop the registers.  Also do it on SEH target for very large
12273 	 frame as the emitted instructions aren't allowed by the ABI in
12274 	 epilogues.  */
12275       if (!m->fs.sp_valid
12276  	  || (TARGET_SEH
12277 	      && (m->fs.sp_offset - frame.reg_save_offset
12278 		  >= SEH_MAX_FRAME_SIZE)))
12279 	{
12280 	  pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12281 				     GEN_INT (m->fs.fp_offset
12282 					      - frame.reg_save_offset),
12283 				     style, false);
12284 	}
12285       else if (m->fs.sp_offset != frame.reg_save_offset)
12286 	{
12287 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12288 				     GEN_INT (m->fs.sp_offset
12289 					      - frame.reg_save_offset),
12290 				     style,
12291 				     m->fs.cfa_reg == stack_pointer_rtx);
12292 	}
12293 
12294       ix86_emit_restore_regs_using_pop ();
12295     }
12296 
12297   /* If we used a stack pointer and haven't already got rid of it,
12298      then do so now.  */
12299   if (m->fs.fp_valid)
12300     {
12301       /* If the stack pointer is valid and pointing at the frame
12302 	 pointer store address, then we only need a pop.  */
12303       if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12304 	ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12305       /* Leave results in shorter dependency chains on CPUs that are
12306 	 able to grok it fast.  */
12307       else if (TARGET_USE_LEAVE
12308 	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12309 	       || !cfun->machine->use_fast_prologue_epilogue)
12310 	ix86_emit_leave ();
12311       else
12312         {
12313 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
12314 				     hard_frame_pointer_rtx,
12315 				     const0_rtx, style, !using_drap);
12316 	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12317         }
12318     }
12319 
12320   if (using_drap)
12321     {
12322       int param_ptr_offset = UNITS_PER_WORD;
12323       rtx insn;
12324 
12325       gcc_assert (stack_realign_drap);
12326 
12327       if (ix86_static_chain_on_stack)
12328 	param_ptr_offset += UNITS_PER_WORD;
12329       if (!call_used_regs[REGNO (crtl->drap_reg)])
12330 	param_ptr_offset += UNITS_PER_WORD;
12331 
12332       insn = emit_insn (gen_rtx_SET
12333 			(VOIDmode, stack_pointer_rtx,
12334 			 gen_rtx_PLUS (Pmode,
12335 				       crtl->drap_reg,
12336 				       GEN_INT (-param_ptr_offset))));
12337       m->fs.cfa_reg = stack_pointer_rtx;
12338       m->fs.cfa_offset = param_ptr_offset;
12339       m->fs.sp_offset = param_ptr_offset;
12340       m->fs.realigned = false;
12341 
12342       add_reg_note (insn, REG_CFA_DEF_CFA,
12343 		    gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12344 				  GEN_INT (param_ptr_offset)));
12345       RTX_FRAME_RELATED_P (insn) = 1;
12346 
12347       if (!call_used_regs[REGNO (crtl->drap_reg)])
12348 	ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12349     }
12350 
12351   /* At this point the stack pointer must be valid, and we must have
12352      restored all of the registers.  We may not have deallocated the
12353      entire stack frame.  We've delayed this until now because it may
12354      be possible to merge the local stack deallocation with the
12355      deallocation forced by ix86_static_chain_on_stack.   */
12356   gcc_assert (m->fs.sp_valid);
12357   gcc_assert (!m->fs.fp_valid);
12358   gcc_assert (!m->fs.realigned);
12359   if (m->fs.sp_offset != UNITS_PER_WORD)
12360     {
12361       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12362 				 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12363 				 style, true);
12364     }
12365   else
12366     ix86_add_queued_cfa_restore_notes (get_last_insn ());
12367 
12368   /* Sibcall epilogues don't want a return instruction.  */
12369   if (style == 0)
12370     {
12371       m->fs = frame_state_save;
12372       return;
12373     }
12374 
12375   if (crtl->args.pops_args && crtl->args.size)
12376     {
12377       rtx popc = GEN_INT (crtl->args.pops_args);
12378 
12379       /* i386 can only pop 64K bytes.  If asked to pop more, pop return
12380 	 address, do explicit add, and jump indirectly to the caller.  */
12381 
12382       if (crtl->args.pops_args >= 65536)
12383 	{
12384 	  rtx ecx = gen_rtx_REG (SImode, CX_REG);
12385 	  rtx insn;
12386 
12387 	  /* There is no "pascal" calling convention in any 64bit ABI.  */
12388 	  gcc_assert (!TARGET_64BIT);
12389 
12390 	  insn = emit_insn (gen_pop (ecx));
12391 	  m->fs.cfa_offset -= UNITS_PER_WORD;
12392 	  m->fs.sp_offset -= UNITS_PER_WORD;
12393 
12394 	  rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12395 	  x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12396 	  add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12397 	  add_reg_note (insn, REG_CFA_REGISTER,
12398 			gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12399 	  RTX_FRAME_RELATED_P (insn) = 1;
12400 
12401 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12402 				     popc, -1, true);
12403 	  emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12404 	}
12405       else
12406 	emit_jump_insn (gen_simple_return_pop_internal (popc));
12407     }
12408   else
12409     emit_jump_insn (gen_simple_return_internal ());
12410 
12411   /* Restore the state back to the state from the prologue,
12412      so that it's correct for the next epilogue.  */
12413   m->fs = frame_state_save;
12414 }
12415 
12416 /* Reset from the function's potential modifications.  */
12417 
12418 static void
12419 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12420 {
12421   if (pic_offset_table_rtx
12422       && !ix86_use_pseudo_pic_reg ())
12423     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12424 #if TARGET_MACHO
12425   /* Mach-O doesn't support labels at the end of objects, so if
12426      it looks like we might want one, insert a NOP.  */
12427   {
12428     rtx_insn *insn = get_last_insn ();
12429     rtx_insn *deleted_debug_label = NULL;
12430     while (insn
12431 	   && NOTE_P (insn)
12432 	   && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12433       {
12434 	/* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12435 	   notes only, instead set their CODE_LABEL_NUMBER to -1,
12436 	   otherwise there would be code generation differences
12437 	   in between -g and -g0.  */
12438 	if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12439 	  deleted_debug_label = insn;
12440 	insn = PREV_INSN (insn);
12441       }
12442     if (insn
12443 	&& (LABEL_P (insn)
12444 	    || (NOTE_P (insn)
12445 		&& NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12446       fputs ("\tnop\n", file);
12447     else if (deleted_debug_label)
12448       for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12449 	if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12450 	  CODE_LABEL_NUMBER (insn) = -1;
12451   }
12452 #endif
12453 
12454 }
12455 
12456 /* Return a scratch register to use in the split stack prologue.  The
12457    split stack prologue is used for -fsplit-stack.  It is the first
12458    instructions in the function, even before the regular prologue.
12459    The scratch register can be any caller-saved register which is not
12460    used for parameters or for the static chain.  */
12461 
12462 static unsigned int
12463 split_stack_prologue_scratch_regno (void)
12464 {
12465   if (TARGET_64BIT)
12466     return R11_REG;
12467   else
12468     {
12469       bool is_fastcall, is_thiscall;
12470       int regparm;
12471 
12472       is_fastcall = (lookup_attribute ("fastcall",
12473 				       TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12474 		     != NULL);
12475       is_thiscall = (lookup_attribute ("thiscall",
12476 				       TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12477 		     != NULL);
12478       regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12479 
12480       if (is_fastcall)
12481 	{
12482 	  if (DECL_STATIC_CHAIN (cfun->decl))
12483 	    {
12484 	      sorry ("-fsplit-stack does not support fastcall with "
12485 		     "nested function");
12486 	      return INVALID_REGNUM;
12487 	    }
12488 	  return AX_REG;
12489 	}
12490       else if (is_thiscall)
12491         {
12492 	  if (!DECL_STATIC_CHAIN (cfun->decl))
12493 	    return DX_REG;
12494 	  return AX_REG;
12495 	}
12496       else if (regparm < 3)
12497 	{
12498 	  if (!DECL_STATIC_CHAIN (cfun->decl))
12499 	    return CX_REG;
12500 	  else
12501 	    {
12502 	      if (regparm >= 2)
12503 		{
12504 		  sorry ("-fsplit-stack does not support 2 register "
12505 			 "parameters for a nested function");
12506 		  return INVALID_REGNUM;
12507 		}
12508 	      return DX_REG;
12509 	    }
12510 	}
12511       else
12512 	{
12513 	  /* FIXME: We could make this work by pushing a register
12514 	     around the addition and comparison.  */
12515 	  sorry ("-fsplit-stack does not support 3 register parameters");
12516 	  return INVALID_REGNUM;
12517 	}
12518     }
12519 }
12520 
12521 /* A SYMBOL_REF for the function which allocates new stackspace for
12522    -fsplit-stack.  */
12523 
12524 static GTY(()) rtx split_stack_fn;
12525 
12526 /* A SYMBOL_REF for the more stack function when using the large
12527    model.  */
12528 
12529 static GTY(()) rtx split_stack_fn_large;
12530 
12531 /* Handle -fsplit-stack.  These are the first instructions in the
12532    function, even before the regular prologue.  */
12533 
12534 void
12535 ix86_expand_split_stack_prologue (void)
12536 {
12537   struct ix86_frame frame;
12538   HOST_WIDE_INT allocate;
12539   unsigned HOST_WIDE_INT args_size;
12540   rtx_code_label *label;
12541   rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12542   rtx scratch_reg = NULL_RTX;
12543   rtx_code_label *varargs_label = NULL;
12544   rtx fn;
12545 
12546   gcc_assert (flag_split_stack && reload_completed);
12547 
12548   ix86_finalize_stack_realign_flags ();
12549   ix86_compute_frame_layout (&frame);
12550   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12551 
12552   /* This is the label we will branch to if we have enough stack
12553      space.  We expect the basic block reordering pass to reverse this
12554      branch if optimizing, so that we branch in the unlikely case.  */
12555   label = gen_label_rtx ();
12556 
12557   /* We need to compare the stack pointer minus the frame size with
12558      the stack boundary in the TCB.  The stack boundary always gives
12559      us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12560      can compare directly.  Otherwise we need to do an addition.  */
12561 
12562   limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12563 			  UNSPEC_STACK_CHECK);
12564   limit = gen_rtx_CONST (Pmode, limit);
12565   limit = gen_rtx_MEM (Pmode, limit);
12566   if (allocate < SPLIT_STACK_AVAILABLE)
12567     current = stack_pointer_rtx;
12568   else
12569     {
12570       unsigned int scratch_regno;
12571       rtx offset;
12572 
12573       /* We need a scratch register to hold the stack pointer minus
12574 	 the required frame size.  Since this is the very start of the
12575 	 function, the scratch register can be any caller-saved
12576 	 register which is not used for parameters.  */
12577       offset = GEN_INT (- allocate);
12578       scratch_regno = split_stack_prologue_scratch_regno ();
12579       if (scratch_regno == INVALID_REGNUM)
12580 	return;
12581       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12582       if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12583 	{
12584 	  /* We don't use ix86_gen_add3 in this case because it will
12585 	     want to split to lea, but when not optimizing the insn
12586 	     will not be split after this point.  */
12587 	  emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12588 				  gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12589 						offset)));
12590 	}
12591       else
12592 	{
12593 	  emit_move_insn (scratch_reg, offset);
12594 	  emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12595 				    stack_pointer_rtx));
12596 	}
12597       current = scratch_reg;
12598     }
12599 
12600   ix86_expand_branch (GEU, current, limit, label);
12601   jump_insn = get_last_insn ();
12602   JUMP_LABEL (jump_insn) = label;
12603 
12604   /* Mark the jump as very likely to be taken.  */
12605   add_int_reg_note (jump_insn, REG_BR_PROB,
12606 		    REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12607 
12608   if (split_stack_fn == NULL_RTX)
12609     {
12610       split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12611       SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12612     }
12613   fn = split_stack_fn;
12614 
12615   /* Get more stack space.  We pass in the desired stack space and the
12616      size of the arguments to copy to the new stack.  In 32-bit mode
12617      we push the parameters; __morestack will return on a new stack
12618      anyhow.  In 64-bit mode we pass the parameters in r10 and
12619      r11.  */
12620   allocate_rtx = GEN_INT (allocate);
12621   args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12622   call_fusage = NULL_RTX;
12623   if (TARGET_64BIT)
12624     {
12625       rtx reg10, reg11;
12626 
12627       reg10 = gen_rtx_REG (Pmode, R10_REG);
12628       reg11 = gen_rtx_REG (Pmode, R11_REG);
12629 
12630       /* If this function uses a static chain, it will be in %r10.
12631 	 Preserve it across the call to __morestack.  */
12632       if (DECL_STATIC_CHAIN (cfun->decl))
12633 	{
12634 	  rtx rax;
12635 
12636 	  rax = gen_rtx_REG (word_mode, AX_REG);
12637 	  emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12638 	  use_reg (&call_fusage, rax);
12639 	}
12640 
12641       if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12642           && !TARGET_PECOFF)
12643 	{
12644 	  HOST_WIDE_INT argval;
12645 
12646 	  gcc_assert (Pmode == DImode);
12647 	  /* When using the large model we need to load the address
12648 	     into a register, and we've run out of registers.  So we
12649 	     switch to a different calling convention, and we call a
12650 	     different function: __morestack_large.  We pass the
12651 	     argument size in the upper 32 bits of r10 and pass the
12652 	     frame size in the lower 32 bits.  */
12653 	  gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12654 	  gcc_assert ((args_size & 0xffffffff) == args_size);
12655 
12656 	  if (split_stack_fn_large == NULL_RTX)
12657 	    {
12658 	      split_stack_fn_large =
12659 	        gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12660 	      SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12661 	    }
12662 	  if (ix86_cmodel == CM_LARGE_PIC)
12663 	    {
12664 	      rtx_code_label *label;
12665 	      rtx x;
12666 
12667 	      label = gen_label_rtx ();
12668 	      emit_label (label);
12669 	      LABEL_PRESERVE_P (label) = 1;
12670 	      emit_insn (gen_set_rip_rex64 (reg10, label));
12671 	      emit_insn (gen_set_got_offset_rex64 (reg11, label));
12672 	      emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12673 	      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12674 				  UNSPEC_GOT);
12675 	      x = gen_rtx_CONST (Pmode, x);
12676 	      emit_move_insn (reg11, x);
12677 	      x = gen_rtx_PLUS (Pmode, reg10, reg11);
12678 	      x = gen_const_mem (Pmode, x);
12679 	      emit_move_insn (reg11, x);
12680 	    }
12681 	  else
12682 	    emit_move_insn (reg11, split_stack_fn_large);
12683 
12684 	  fn = reg11;
12685 
12686 	  argval = ((args_size << 16) << 16) + allocate;
12687 	  emit_move_insn (reg10, GEN_INT (argval));
12688 	}
12689       else
12690 	{
12691 	  emit_move_insn (reg10, allocate_rtx);
12692 	  emit_move_insn (reg11, GEN_INT (args_size));
12693 	  use_reg (&call_fusage, reg11);
12694 	}
12695 
12696       use_reg (&call_fusage, reg10);
12697     }
12698   else
12699     {
12700       emit_insn (gen_push (GEN_INT (args_size)));
12701       emit_insn (gen_push (allocate_rtx));
12702     }
12703   call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12704 				GEN_INT (UNITS_PER_WORD), constm1_rtx,
12705 				NULL_RTX, false);
12706   add_function_usage_to (call_insn, call_fusage);
12707 
12708   /* In order to make call/return prediction work right, we now need
12709      to execute a return instruction.  See
12710      libgcc/config/i386/morestack.S for the details on how this works.
12711 
12712      For flow purposes gcc must not see this as a return
12713      instruction--we need control flow to continue at the subsequent
12714      label.  Therefore, we use an unspec.  */
12715   gcc_assert (crtl->args.pops_args < 65536);
12716   emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12717 
12718   /* If we are in 64-bit mode and this function uses a static chain,
12719      we saved %r10 in %rax before calling _morestack.  */
12720   if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12721     emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12722 		    gen_rtx_REG (word_mode, AX_REG));
12723 
12724   /* If this function calls va_start, we need to store a pointer to
12725      the arguments on the old stack, because they may not have been
12726      all copied to the new stack.  At this point the old stack can be
12727      found at the frame pointer value used by __morestack, because
12728      __morestack has set that up before calling back to us.  Here we
12729      store that pointer in a scratch register, and in
12730      ix86_expand_prologue we store the scratch register in a stack
12731      slot.  */
12732   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12733     {
12734       unsigned int scratch_regno;
12735       rtx frame_reg;
12736       int words;
12737 
12738       scratch_regno = split_stack_prologue_scratch_regno ();
12739       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12740       frame_reg = gen_rtx_REG (Pmode, BP_REG);
12741 
12742       /* 64-bit:
12743 	 fp -> old fp value
12744 	       return address within this function
12745 	       return address of caller of this function
12746 	       stack arguments
12747 	 So we add three words to get to the stack arguments.
12748 
12749 	 32-bit:
12750 	 fp -> old fp value
12751 	       return address within this function
12752                first argument to __morestack
12753                second argument to __morestack
12754                return address of caller of this function
12755                stack arguments
12756          So we add five words to get to the stack arguments.
12757       */
12758       words = TARGET_64BIT ? 3 : 5;
12759       emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12760 			      gen_rtx_PLUS (Pmode, frame_reg,
12761 					    GEN_INT (words * UNITS_PER_WORD))));
12762 
12763       varargs_label = gen_label_rtx ();
12764       emit_jump_insn (gen_jump (varargs_label));
12765       JUMP_LABEL (get_last_insn ()) = varargs_label;
12766 
12767       emit_barrier ();
12768     }
12769 
12770   emit_label (label);
12771   LABEL_NUSES (label) = 1;
12772 
12773   /* If this function calls va_start, we now have to set the scratch
12774      register for the case where we do not call __morestack.  In this
12775      case we need to set it based on the stack pointer.  */
12776   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12777     {
12778       emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12779 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12780 					    GEN_INT (UNITS_PER_WORD))));
12781 
12782       emit_label (varargs_label);
12783       LABEL_NUSES (varargs_label) = 1;
12784     }
12785 }
12786 
12787 /* We may have to tell the dataflow pass that the split stack prologue
12788    is initializing a scratch register.  */
12789 
12790 static void
12791 ix86_live_on_entry (bitmap regs)
12792 {
12793   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12794     {
12795       gcc_assert (flag_split_stack);
12796       bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12797     }
12798 }
12799 
12800 /* Extract the parts of an RTL expression that is a valid memory address
12801    for an instruction.  Return 0 if the structure of the address is
12802    grossly off.  Return -1 if the address contains ASHIFT, so it is not
12803    strictly valid, but still used for computing length of lea instruction.  */
12804 
12805 int
12806 ix86_decompose_address (rtx addr, struct ix86_address *out)
12807 {
12808   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12809   rtx base_reg, index_reg;
12810   HOST_WIDE_INT scale = 1;
12811   rtx scale_rtx = NULL_RTX;
12812   rtx tmp;
12813   int retval = 1;
12814   enum ix86_address_seg seg = SEG_DEFAULT;
12815 
12816   /* Allow zero-extended SImode addresses,
12817      they will be emitted with addr32 prefix.  */
12818   if (TARGET_64BIT && GET_MODE (addr) == DImode)
12819     {
12820       if (GET_CODE (addr) == ZERO_EXTEND
12821 	  && GET_MODE (XEXP (addr, 0)) == SImode)
12822 	{
12823 	  addr = XEXP (addr, 0);
12824 	  if (CONST_INT_P (addr))
12825 	    return 0;
12826 	}
12827       else if (GET_CODE (addr) == AND
12828 	       && const_32bit_mask (XEXP (addr, 1), DImode))
12829 	{
12830 	  addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12831 	  if (addr == NULL_RTX)
12832 	    return 0;
12833 
12834 	  if (CONST_INT_P (addr))
12835 	    return 0;
12836 	}
12837     }
12838 
12839   /* Allow SImode subregs of DImode addresses,
12840      they will be emitted with addr32 prefix.  */
12841   if (TARGET_64BIT && GET_MODE (addr) == SImode)
12842     {
12843       if (GET_CODE (addr) == SUBREG
12844 	  && GET_MODE (SUBREG_REG (addr)) == DImode)
12845 	{
12846 	  addr = SUBREG_REG (addr);
12847 	  if (CONST_INT_P (addr))
12848 	    return 0;
12849 	}
12850     }
12851 
12852   if (REG_P (addr))
12853     base = addr;
12854   else if (GET_CODE (addr) == SUBREG)
12855     {
12856       if (REG_P (SUBREG_REG (addr)))
12857 	base = addr;
12858       else
12859 	return 0;
12860     }
12861   else if (GET_CODE (addr) == PLUS)
12862     {
12863       rtx addends[4], op;
12864       int n = 0, i;
12865 
12866       op = addr;
12867       do
12868 	{
12869 	  if (n >= 4)
12870 	    return 0;
12871 	  addends[n++] = XEXP (op, 1);
12872 	  op = XEXP (op, 0);
12873 	}
12874       while (GET_CODE (op) == PLUS);
12875       if (n >= 4)
12876 	return 0;
12877       addends[n] = op;
12878 
12879       for (i = n; i >= 0; --i)
12880 	{
12881 	  op = addends[i];
12882 	  switch (GET_CODE (op))
12883 	    {
12884 	    case MULT:
12885 	      if (index)
12886 		return 0;
12887 	      index = XEXP (op, 0);
12888 	      scale_rtx = XEXP (op, 1);
12889 	      break;
12890 
12891 	    case ASHIFT:
12892 	      if (index)
12893 		return 0;
12894 	      index = XEXP (op, 0);
12895 	      tmp = XEXP (op, 1);
12896 	      if (!CONST_INT_P (tmp))
12897 		return 0;
12898 	      scale = INTVAL (tmp);
12899 	      if ((unsigned HOST_WIDE_INT) scale > 3)
12900 		return 0;
12901 	      scale = 1 << scale;
12902 	      break;
12903 
12904 	    case ZERO_EXTEND:
12905 	      op = XEXP (op, 0);
12906 	      if (GET_CODE (op) != UNSPEC)
12907 		return 0;
12908 	      /* FALLTHRU */
12909 
12910 	    case UNSPEC:
12911 	      if (XINT (op, 1) == UNSPEC_TP
12912 	          && TARGET_TLS_DIRECT_SEG_REFS
12913 	          && seg == SEG_DEFAULT)
12914 		seg = DEFAULT_TLS_SEG_REG;
12915 	      else
12916 		return 0;
12917 	      break;
12918 
12919 	    case SUBREG:
12920 	      if (!REG_P (SUBREG_REG (op)))
12921 		return 0;
12922 	      /* FALLTHRU */
12923 
12924 	    case REG:
12925 	      if (!base)
12926 		base = op;
12927 	      else if (!index)
12928 		index = op;
12929 	      else
12930 		return 0;
12931 	      break;
12932 
12933 	    case CONST:
12934 	    case CONST_INT:
12935 	    case SYMBOL_REF:
12936 	    case LABEL_REF:
12937 	      if (disp)
12938 		return 0;
12939 	      disp = op;
12940 	      break;
12941 
12942 	    default:
12943 	      return 0;
12944 	    }
12945 	}
12946     }
12947   else if (GET_CODE (addr) == MULT)
12948     {
12949       index = XEXP (addr, 0);		/* index*scale */
12950       scale_rtx = XEXP (addr, 1);
12951     }
12952   else if (GET_CODE (addr) == ASHIFT)
12953     {
12954       /* We're called for lea too, which implements ashift on occasion.  */
12955       index = XEXP (addr, 0);
12956       tmp = XEXP (addr, 1);
12957       if (!CONST_INT_P (tmp))
12958 	return 0;
12959       scale = INTVAL (tmp);
12960       if ((unsigned HOST_WIDE_INT) scale > 3)
12961 	return 0;
12962       scale = 1 << scale;
12963       retval = -1;
12964     }
12965   else
12966     disp = addr;			/* displacement */
12967 
12968   if (index)
12969     {
12970       if (REG_P (index))
12971 	;
12972       else if (GET_CODE (index) == SUBREG
12973 	       && REG_P (SUBREG_REG (index)))
12974 	;
12975       else
12976 	return 0;
12977     }
12978 
12979   /* Extract the integral value of scale.  */
12980   if (scale_rtx)
12981     {
12982       if (!CONST_INT_P (scale_rtx))
12983 	return 0;
12984       scale = INTVAL (scale_rtx);
12985     }
12986 
12987   base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12988   index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12989 
12990   /* Avoid useless 0 displacement.  */
12991   if (disp == const0_rtx && (base || index))
12992     disp = NULL_RTX;
12993 
12994   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
12995   if (base_reg && index_reg && scale == 1
12996       && (index_reg == arg_pointer_rtx
12997 	  || index_reg == frame_pointer_rtx
12998 	  || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12999     {
13000       std::swap (base, index);
13001       std::swap (base_reg, index_reg);
13002     }
13003 
13004   /* Special case: %ebp cannot be encoded as a base without a displacement.
13005      Similarly %r13.  */
13006   if (!disp
13007       && base_reg
13008       && (base_reg == hard_frame_pointer_rtx
13009 	  || base_reg == frame_pointer_rtx
13010 	  || base_reg == arg_pointer_rtx
13011 	  || (REG_P (base_reg)
13012 	      && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
13013 		  || REGNO (base_reg) == R13_REG))))
13014     disp = const0_rtx;
13015 
13016   /* Special case: on K6, [%esi] makes the instruction vector decoded.
13017      Avoid this by transforming to [%esi+0].
13018      Reload calls address legitimization without cfun defined, so we need
13019      to test cfun for being non-NULL. */
13020   if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
13021       && base_reg && !index_reg && !disp
13022       && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
13023     disp = const0_rtx;
13024 
13025   /* Special case: encode reg+reg instead of reg*2.  */
13026   if (!base && index && scale == 2)
13027     base = index, base_reg = index_reg, scale = 1;
13028 
13029   /* Special case: scaling cannot be encoded without base or displacement.  */
13030   if (!base && !disp && index && scale != 1)
13031     disp = const0_rtx;
13032 
13033   out->base = base;
13034   out->index = index;
13035   out->disp = disp;
13036   out->scale = scale;
13037   out->seg = seg;
13038 
13039   return retval;
13040 }
13041 
13042 /* Return cost of the memory address x.
13043    For i386, it is better to use a complex address than let gcc copy
13044    the address into a reg and make a new pseudo.  But not if the address
13045    requires to two regs - that would mean more pseudos with longer
13046    lifetimes.  */
13047 static int
13048 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
13049 {
13050   struct ix86_address parts;
13051   int cost = 1;
13052   int ok = ix86_decompose_address (x, &parts);
13053 
13054   gcc_assert (ok);
13055 
13056   if (parts.base && GET_CODE (parts.base) == SUBREG)
13057     parts.base = SUBREG_REG (parts.base);
13058   if (parts.index && GET_CODE (parts.index) == SUBREG)
13059     parts.index = SUBREG_REG (parts.index);
13060 
13061   /* Attempt to minimize number of registers in the address by increasing
13062      address cost for each used register.  We don't increase address cost
13063      for "pic_offset_table_rtx".  When a memopt with "pic_offset_table_rtx"
13064      is not invariant itself it most likely means that base or index is not
13065      invariant.  Therefore only "pic_offset_table_rtx" could be hoisted out,
13066      which is not profitable for x86.  */
13067   if (parts.base
13068       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
13069       && (current_pass->type == GIMPLE_PASS
13070 	  || !pic_offset_table_rtx
13071 	  || !REG_P (parts.base)
13072 	  || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
13073     cost++;
13074 
13075   if (parts.index
13076       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
13077       && (current_pass->type == GIMPLE_PASS
13078 	  || !pic_offset_table_rtx
13079 	  || !REG_P (parts.index)
13080 	  || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
13081     cost++;
13082 
13083   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
13084      since it's predecode logic can't detect the length of instructions
13085      and it degenerates to vector decoded.  Increase cost of such
13086      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
13087      to split such addresses or even refuse such addresses at all.
13088 
13089      Following addressing modes are affected:
13090       [base+scale*index]
13091       [scale*index+disp]
13092       [base+index]
13093 
13094      The first and last case  may be avoidable by explicitly coding the zero in
13095      memory address, but I don't have AMD-K6 machine handy to check this
13096      theory.  */
13097 
13098   if (TARGET_K6
13099       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
13100 	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
13101 	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
13102     cost += 10;
13103 
13104   return cost;
13105 }
13106 
13107 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13108    this is used for to form addresses to local data when -fPIC is in
13109    use.  */
13110 
13111 static bool
13112 darwin_local_data_pic (rtx disp)
13113 {
13114   return (GET_CODE (disp) == UNSPEC
13115 	  && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13116 }
13117 
13118 /* Determine if a given RTX is a valid constant.  We already know this
13119    satisfies CONSTANT_P.  */
13120 
13121 static bool
13122 ix86_legitimate_constant_p (machine_mode, rtx x)
13123 {
13124   /* Pointer bounds constants are not valid.  */
13125   if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13126     return false;
13127 
13128   switch (GET_CODE (x))
13129     {
13130     case CONST:
13131       x = XEXP (x, 0);
13132 
13133       if (GET_CODE (x) == PLUS)
13134 	{
13135 	  if (!CONST_INT_P (XEXP (x, 1)))
13136 	    return false;
13137 	  x = XEXP (x, 0);
13138 	}
13139 
13140       if (TARGET_MACHO && darwin_local_data_pic (x))
13141 	return true;
13142 
13143       /* Only some unspecs are valid as "constants".  */
13144       if (GET_CODE (x) == UNSPEC)
13145 	switch (XINT (x, 1))
13146 	  {
13147 	  case UNSPEC_GOT:
13148 	  case UNSPEC_GOTOFF:
13149 	  case UNSPEC_PLTOFF:
13150 	    return TARGET_64BIT;
13151 	  case UNSPEC_TPOFF:
13152 	  case UNSPEC_NTPOFF:
13153 	    x = XVECEXP (x, 0, 0);
13154 	    return (GET_CODE (x) == SYMBOL_REF
13155 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13156 	  case UNSPEC_DTPOFF:
13157 	    x = XVECEXP (x, 0, 0);
13158 	    return (GET_CODE (x) == SYMBOL_REF
13159 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13160 	  default:
13161 	    return false;
13162 	  }
13163 
13164       /* We must have drilled down to a symbol.  */
13165       if (GET_CODE (x) == LABEL_REF)
13166 	return true;
13167       if (GET_CODE (x) != SYMBOL_REF)
13168 	return false;
13169       /* FALLTHRU */
13170 
13171     case SYMBOL_REF:
13172       /* TLS symbols are never valid.  */
13173       if (SYMBOL_REF_TLS_MODEL (x))
13174 	return false;
13175 
13176       /* DLLIMPORT symbols are never valid.  */
13177       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13178 	  && SYMBOL_REF_DLLIMPORT_P (x))
13179 	return false;
13180 
13181 #if TARGET_MACHO
13182       /* mdynamic-no-pic */
13183       if (MACHO_DYNAMIC_NO_PIC_P)
13184 	return machopic_symbol_defined_p (x);
13185 #endif
13186       break;
13187 
13188     case CONST_DOUBLE:
13189       if (GET_MODE (x) == TImode
13190 	  && x != CONST0_RTX (TImode)
13191           && !TARGET_64BIT)
13192 	return false;
13193       break;
13194 
13195     case CONST_VECTOR:
13196       if (!standard_sse_constant_p (x))
13197 	return false;
13198 
13199     default:
13200       break;
13201     }
13202 
13203   /* Otherwise we handle everything else in the move patterns.  */
13204   return true;
13205 }
13206 
13207 /* Determine if it's legal to put X into the constant pool.  This
13208    is not possible for the address of thread-local symbols, which
13209    is checked above.  */
13210 
13211 static bool
13212 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13213 {
13214   /* We can always put integral constants and vectors in memory.  */
13215   switch (GET_CODE (x))
13216     {
13217     case CONST_INT:
13218     case CONST_DOUBLE:
13219     case CONST_VECTOR:
13220       return false;
13221 
13222     default:
13223       break;
13224     }
13225   return !ix86_legitimate_constant_p (mode, x);
13226 }
13227 
13228 /*  Nonzero if the symbol is marked as dllimport, or as stub-variable,
13229     otherwise zero.  */
13230 
13231 static bool
13232 is_imported_p (rtx x)
13233 {
13234   if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13235       || GET_CODE (x) != SYMBOL_REF)
13236     return false;
13237 
13238   return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13239 }
13240 
13241 
13242 /* Nonzero if the constant value X is a legitimate general operand
13243    when generating PIC code.  It is given that flag_pic is on and
13244    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
13245 
13246 bool
13247 legitimate_pic_operand_p (rtx x)
13248 {
13249   rtx inner;
13250 
13251   switch (GET_CODE (x))
13252     {
13253     case CONST:
13254       inner = XEXP (x, 0);
13255       if (GET_CODE (inner) == PLUS
13256 	  && CONST_INT_P (XEXP (inner, 1)))
13257 	inner = XEXP (inner, 0);
13258 
13259       /* Only some unspecs are valid as "constants".  */
13260       if (GET_CODE (inner) == UNSPEC)
13261 	switch (XINT (inner, 1))
13262 	  {
13263 	  case UNSPEC_GOT:
13264 	  case UNSPEC_GOTOFF:
13265 	  case UNSPEC_PLTOFF:
13266 	    return TARGET_64BIT;
13267 	  case UNSPEC_TPOFF:
13268 	    x = XVECEXP (inner, 0, 0);
13269 	    return (GET_CODE (x) == SYMBOL_REF
13270 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13271 	  case UNSPEC_MACHOPIC_OFFSET:
13272 	    return legitimate_pic_address_disp_p (x);
13273 	  default:
13274 	    return false;
13275 	  }
13276       /* FALLTHRU */
13277 
13278     case SYMBOL_REF:
13279     case LABEL_REF:
13280       return legitimate_pic_address_disp_p (x);
13281 
13282     default:
13283       return true;
13284     }
13285 }
13286 
13287 /* Determine if a given CONST RTX is a valid memory displacement
13288    in PIC mode.  */
13289 
13290 bool
13291 legitimate_pic_address_disp_p (rtx disp)
13292 {
13293   bool saw_plus;
13294 
13295   /* In 64bit mode we can allow direct addresses of symbols and labels
13296      when they are not dynamic symbols.  */
13297   if (TARGET_64BIT)
13298     {
13299       rtx op0 = disp, op1;
13300 
13301       switch (GET_CODE (disp))
13302 	{
13303 	case LABEL_REF:
13304 	  return true;
13305 
13306 	case CONST:
13307 	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
13308 	    break;
13309 	  op0 = XEXP (XEXP (disp, 0), 0);
13310 	  op1 = XEXP (XEXP (disp, 0), 1);
13311 	  if (!CONST_INT_P (op1)
13312 	      || INTVAL (op1) >= 16*1024*1024
13313 	      || INTVAL (op1) < -16*1024*1024)
13314             break;
13315 	  if (GET_CODE (op0) == LABEL_REF)
13316 	    return true;
13317 	  if (GET_CODE (op0) == CONST
13318 	      && GET_CODE (XEXP (op0, 0)) == UNSPEC
13319 	      && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13320 	    return true;
13321 	  if (GET_CODE (op0) == UNSPEC
13322 	      && XINT (op0, 1) == UNSPEC_PCREL)
13323 	    return true;
13324 	  if (GET_CODE (op0) != SYMBOL_REF)
13325 	    break;
13326 	  /* FALLTHRU */
13327 
13328 	case SYMBOL_REF:
13329 	  /* TLS references should always be enclosed in UNSPEC.
13330 	     The dllimported symbol needs always to be resolved.  */
13331 	  if (SYMBOL_REF_TLS_MODEL (op0)
13332 	      || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13333 	    return false;
13334 
13335 	  if (TARGET_PECOFF)
13336 	    {
13337 	      if (is_imported_p (op0))
13338 		return true;
13339 
13340 	      if (SYMBOL_REF_FAR_ADDR_P (op0)
13341 		  || !SYMBOL_REF_LOCAL_P (op0))
13342 		break;
13343 
13344 	      /* Function-symbols need to be resolved only for
13345 	         large-model.
13346 	         For the small-model we don't need to resolve anything
13347 	         here.  */
13348 	      if ((ix86_cmodel != CM_LARGE_PIC
13349 	           && SYMBOL_REF_FUNCTION_P (op0))
13350 		  || ix86_cmodel == CM_SMALL_PIC)
13351 		return true;
13352 	      /* Non-external symbols don't need to be resolved for
13353 	         large, and medium-model.  */
13354 	      if ((ix86_cmodel == CM_LARGE_PIC
13355 		   || ix86_cmodel == CM_MEDIUM_PIC)
13356 		  && !SYMBOL_REF_EXTERNAL_P (op0))
13357 		return true;
13358 	    }
13359 	  else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13360 		   && (SYMBOL_REF_LOCAL_P (op0)
13361 		       || (HAVE_LD_PIE_COPYRELOC
13362 			   && flag_pie
13363 			   && !SYMBOL_REF_WEAK (op0)
13364 			   && !SYMBOL_REF_FUNCTION_P (op0)))
13365 		   && ix86_cmodel != CM_LARGE_PIC)
13366 	    return true;
13367 	  break;
13368 
13369 	default:
13370 	  break;
13371 	}
13372     }
13373   if (GET_CODE (disp) != CONST)
13374     return false;
13375   disp = XEXP (disp, 0);
13376 
13377   if (TARGET_64BIT)
13378     {
13379       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
13380          of GOT tables.  We should not need these anyway.  */
13381       if (GET_CODE (disp) != UNSPEC
13382 	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
13383 	      && XINT (disp, 1) != UNSPEC_GOTOFF
13384 	      && XINT (disp, 1) != UNSPEC_PCREL
13385 	      && XINT (disp, 1) != UNSPEC_PLTOFF))
13386 	return false;
13387 
13388       if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13389 	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13390 	return false;
13391       return true;
13392     }
13393 
13394   saw_plus = false;
13395   if (GET_CODE (disp) == PLUS)
13396     {
13397       if (!CONST_INT_P (XEXP (disp, 1)))
13398 	return false;
13399       disp = XEXP (disp, 0);
13400       saw_plus = true;
13401     }
13402 
13403   if (TARGET_MACHO && darwin_local_data_pic (disp))
13404     return true;
13405 
13406   if (GET_CODE (disp) != UNSPEC)
13407     return false;
13408 
13409   switch (XINT (disp, 1))
13410     {
13411     case UNSPEC_GOT:
13412       if (saw_plus)
13413 	return false;
13414       /* We need to check for both symbols and labels because VxWorks loads
13415 	 text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
13416 	 details.  */
13417       return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13418 	      || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13419     case UNSPEC_GOTOFF:
13420       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13421 	 While ABI specify also 32bit relocation but we don't produce it in
13422 	 small PIC model at all.  */
13423       if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13424 	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13425 	  && !TARGET_64BIT)
13426         return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13427       return false;
13428     case UNSPEC_GOTTPOFF:
13429     case UNSPEC_GOTNTPOFF:
13430     case UNSPEC_INDNTPOFF:
13431       if (saw_plus)
13432 	return false;
13433       disp = XVECEXP (disp, 0, 0);
13434       return (GET_CODE (disp) == SYMBOL_REF
13435 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13436     case UNSPEC_NTPOFF:
13437       disp = XVECEXP (disp, 0, 0);
13438       return (GET_CODE (disp) == SYMBOL_REF
13439 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13440     case UNSPEC_DTPOFF:
13441       disp = XVECEXP (disp, 0, 0);
13442       return (GET_CODE (disp) == SYMBOL_REF
13443 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13444     }
13445 
13446   return false;
13447 }
13448 
13449 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
13450    replace the input X, or the original X if no replacement is called for.
13451    The output parameter *WIN is 1 if the calling macro should goto WIN,
13452    0 if it should not.  */
13453 
13454 bool
13455 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13456 			       	int)
13457 {
13458   /* Reload can generate:
13459 
13460      (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13461 		       (reg:DI 97))
13462 	      (reg:DI 2 cx))
13463 
13464      This RTX is rejected from ix86_legitimate_address_p due to
13465      non-strictness of base register 97.  Following this rejection,
13466      reload pushes all three components into separate registers,
13467      creating invalid memory address RTX.
13468 
13469      Following code reloads only the invalid part of the
13470      memory address RTX.  */
13471 
13472   if (GET_CODE (x) == PLUS
13473       && REG_P (XEXP (x, 1))
13474       && GET_CODE (XEXP (x, 0)) == PLUS
13475       && REG_P (XEXP (XEXP (x, 0), 1)))
13476     {
13477       rtx base, index;
13478       bool something_reloaded = false;
13479 
13480       base = XEXP (XEXP (x, 0), 1);
13481       if (!REG_OK_FOR_BASE_STRICT_P (base))
13482 	{
13483 	  push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13484 		       BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13485 		       opnum, (enum reload_type) type);
13486 	  something_reloaded = true;
13487 	}
13488 
13489       index = XEXP (x, 1);
13490       if (!REG_OK_FOR_INDEX_STRICT_P (index))
13491 	{
13492 	  push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13493 		       INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13494 		       opnum, (enum reload_type) type);
13495 	  something_reloaded = true;
13496 	}
13497 
13498       gcc_assert (something_reloaded);
13499       return true;
13500     }
13501 
13502   return false;
13503 }
13504 
13505 /* Determine if op is suitable RTX for an address register.
13506    Return naked register if a register or a register subreg is
13507    found, otherwise return NULL_RTX.  */
13508 
13509 static rtx
13510 ix86_validate_address_register (rtx op)
13511 {
13512   machine_mode mode = GET_MODE (op);
13513 
13514   /* Only SImode or DImode registers can form the address.  */
13515   if (mode != SImode && mode != DImode)
13516     return NULL_RTX;
13517 
13518   if (REG_P (op))
13519     return op;
13520   else if (GET_CODE (op) == SUBREG)
13521     {
13522       rtx reg = SUBREG_REG (op);
13523 
13524       if (!REG_P (reg))
13525 	return NULL_RTX;
13526 
13527       mode = GET_MODE (reg);
13528 
13529       /* Don't allow SUBREGs that span more than a word.  It can
13530 	 lead to spill failures when the register is one word out
13531 	 of a two word structure.  */
13532       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13533 	return NULL_RTX;
13534 
13535       /* Allow only SUBREGs of non-eliminable hard registers.  */
13536       if (register_no_elim_operand (reg, mode))
13537 	return reg;
13538     }
13539 
13540   /* Op is not a register.  */
13541   return NULL_RTX;
13542 }
13543 
13544 /* Recognizes RTL expressions that are valid memory addresses for an
13545    instruction.  The MODE argument is the machine mode for the MEM
13546    expression that wants to use this address.
13547 
13548    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
13549    convert common non-canonical forms to canonical form so that they will
13550    be recognized.  */
13551 
13552 static bool
13553 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13554 {
13555   struct ix86_address parts;
13556   rtx base, index, disp;
13557   HOST_WIDE_INT scale;
13558   enum ix86_address_seg seg;
13559 
13560   if (ix86_decompose_address (addr, &parts) <= 0)
13561     /* Decomposition failed.  */
13562     return false;
13563 
13564   base = parts.base;
13565   index = parts.index;
13566   disp = parts.disp;
13567   scale = parts.scale;
13568   seg = parts.seg;
13569 
13570   /* Validate base register.  */
13571   if (base)
13572     {
13573       rtx reg = ix86_validate_address_register (base);
13574 
13575       if (reg == NULL_RTX)
13576 	return false;
13577 
13578       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13579 	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13580 	/* Base is not valid.  */
13581 	return false;
13582     }
13583 
13584   /* Validate index register.  */
13585   if (index)
13586     {
13587       rtx reg = ix86_validate_address_register (index);
13588 
13589       if (reg == NULL_RTX)
13590 	return false;
13591 
13592       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13593 	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13594 	/* Index is not valid.  */
13595 	return false;
13596     }
13597 
13598   /* Index and base should have the same mode.  */
13599   if (base && index
13600       && GET_MODE (base) != GET_MODE (index))
13601     return false;
13602 
13603   /* Address override works only on the (%reg) part of %fs:(%reg).  */
13604   if (seg != SEG_DEFAULT
13605       && ((base && GET_MODE (base) != word_mode)
13606 	  || (index && GET_MODE (index) != word_mode)))
13607     return false;
13608 
13609   /* Validate scale factor.  */
13610   if (scale != 1)
13611     {
13612       if (!index)
13613 	/* Scale without index.  */
13614 	return false;
13615 
13616       if (scale != 2 && scale != 4 && scale != 8)
13617 	/* Scale is not a valid multiplier.  */
13618 	return false;
13619     }
13620 
13621   /* Validate displacement.  */
13622   if (disp)
13623     {
13624       if (GET_CODE (disp) == CONST
13625 	  && GET_CODE (XEXP (disp, 0)) == UNSPEC
13626 	  && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13627 	switch (XINT (XEXP (disp, 0), 1))
13628 	  {
13629 	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13630 	     used.  While ABI specify also 32bit relocations, we don't produce
13631 	     them at all and use IP relative instead.  */
13632 	  case UNSPEC_GOT:
13633 	  case UNSPEC_GOTOFF:
13634 	    gcc_assert (flag_pic);
13635 	    if (!TARGET_64BIT)
13636 	      goto is_legitimate_pic;
13637 
13638 	    /* 64bit address unspec.  */
13639 	    return false;
13640 
13641 	  case UNSPEC_GOTPCREL:
13642 	  case UNSPEC_PCREL:
13643 	    gcc_assert (flag_pic);
13644 	    goto is_legitimate_pic;
13645 
13646 	  case UNSPEC_GOTTPOFF:
13647 	  case UNSPEC_GOTNTPOFF:
13648 	  case UNSPEC_INDNTPOFF:
13649 	  case UNSPEC_NTPOFF:
13650 	  case UNSPEC_DTPOFF:
13651 	    break;
13652 
13653 	  case UNSPEC_STACK_CHECK:
13654 	    gcc_assert (flag_split_stack);
13655 	    break;
13656 
13657 	  default:
13658 	    /* Invalid address unspec.  */
13659 	    return false;
13660 	  }
13661 
13662       else if (SYMBOLIC_CONST (disp)
13663 	       && (flag_pic
13664 		   || (TARGET_MACHO
13665 #if TARGET_MACHO
13666 		       && MACHOPIC_INDIRECT
13667 		       && !machopic_operand_p (disp)
13668 #endif
13669 	       )))
13670 	{
13671 
13672 	is_legitimate_pic:
13673 	  if (TARGET_64BIT && (index || base))
13674 	    {
13675 	      /* foo@dtpoff(%rX) is ok.  */
13676 	      if (GET_CODE (disp) != CONST
13677 		  || GET_CODE (XEXP (disp, 0)) != PLUS
13678 		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13679 		  || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13680 		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13681 		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13682 		/* Non-constant pic memory reference.  */
13683 		return false;
13684 	    }
13685 	  else if ((!TARGET_MACHO || flag_pic)
13686 		    && ! legitimate_pic_address_disp_p (disp))
13687 	    /* Displacement is an invalid pic construct.  */
13688 	    return false;
13689 #if TARGET_MACHO
13690 	  else if (MACHO_DYNAMIC_NO_PIC_P
13691 		   && !ix86_legitimate_constant_p (Pmode, disp))
13692 	    /* displacment must be referenced via non_lazy_pointer */
13693 	    return false;
13694 #endif
13695 
13696           /* This code used to verify that a symbolic pic displacement
13697 	     includes the pic_offset_table_rtx register.
13698 
13699 	     While this is good idea, unfortunately these constructs may
13700 	     be created by "adds using lea" optimization for incorrect
13701 	     code like:
13702 
13703 	     int a;
13704 	     int foo(int i)
13705 	       {
13706 	         return *(&a+i);
13707 	       }
13708 
13709 	     This code is nonsensical, but results in addressing
13710 	     GOT table with pic_offset_table_rtx base.  We can't
13711 	     just refuse it easily, since it gets matched by
13712 	     "addsi3" pattern, that later gets split to lea in the
13713 	     case output register differs from input.  While this
13714 	     can be handled by separate addsi pattern for this case
13715 	     that never results in lea, this seems to be easier and
13716 	     correct fix for crash to disable this test.  */
13717 	}
13718       else if (GET_CODE (disp) != LABEL_REF
13719 	       && !CONST_INT_P (disp)
13720 	       && (GET_CODE (disp) != CONST
13721 		   || !ix86_legitimate_constant_p (Pmode, disp))
13722 	       && (GET_CODE (disp) != SYMBOL_REF
13723 		   || !ix86_legitimate_constant_p (Pmode, disp)))
13724 	/* Displacement is not constant.  */
13725 	return false;
13726       else if (TARGET_64BIT
13727 	       && !x86_64_immediate_operand (disp, VOIDmode))
13728 	/* Displacement is out of range.  */
13729 	return false;
13730       /* In x32 mode, constant addresses are sign extended to 64bit, so
13731 	 we have to prevent addresses from 0x80000000 to 0xffffffff.  */
13732       else if (TARGET_X32 && !(index || base)
13733 	       && CONST_INT_P (disp)
13734 	       && val_signbit_known_set_p (SImode, INTVAL (disp)))
13735 	return false;
13736     }
13737 
13738   /* Everything looks valid.  */
13739   return true;
13740 }
13741 
13742 /* Determine if a given RTX is a valid constant address.  */
13743 
13744 bool
13745 constant_address_p (rtx x)
13746 {
13747   return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13748 }
13749 
13750 /* Return a unique alias set for the GOT.  */
13751 
13752 static alias_set_type
13753 ix86_GOT_alias_set (void)
13754 {
13755   static alias_set_type set = -1;
13756   if (set == -1)
13757     set = new_alias_set ();
13758   return set;
13759 }
13760 
13761 /* Set regs_ever_live for PIC base address register
13762    to true if required.  */
13763 static void
13764 set_pic_reg_ever_live ()
13765 {
13766   if (reload_in_progress)
13767     df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13768 }
13769 
13770 /* Return a legitimate reference for ORIG (an address) using the
13771    register REG.  If REG is 0, a new pseudo is generated.
13772 
13773    There are two types of references that must be handled:
13774 
13775    1. Global data references must load the address from the GOT, via
13776       the PIC reg.  An insn is emitted to do this load, and the reg is
13777       returned.
13778 
13779    2. Static data references, constant pool addresses, and code labels
13780       compute the address as an offset from the GOT, whose base is in
13781       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
13782       differentiate them from global data objects.  The returned
13783       address is the PIC reg + an unspec constant.
13784 
13785    TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13786    reg also appears in the address.  */
13787 
13788 static rtx
13789 legitimize_pic_address (rtx orig, rtx reg)
13790 {
13791   rtx addr = orig;
13792   rtx new_rtx = orig;
13793 
13794 #if TARGET_MACHO
13795   if (TARGET_MACHO && !TARGET_64BIT)
13796     {
13797       if (reg == 0)
13798 	reg = gen_reg_rtx (Pmode);
13799       /* Use the generic Mach-O PIC machinery.  */
13800       return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13801     }
13802 #endif
13803 
13804   if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13805     {
13806       rtx tmp = legitimize_pe_coff_symbol (addr, true);
13807       if (tmp)
13808         return tmp;
13809     }
13810 
13811   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13812     new_rtx = addr;
13813   else if (TARGET_64BIT && !TARGET_PECOFF
13814 	   && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13815     {
13816       rtx tmpreg;
13817       /* This symbol may be referenced via a displacement from the PIC
13818 	 base address (@GOTOFF).  */
13819 
13820       set_pic_reg_ever_live ();
13821       if (GET_CODE (addr) == CONST)
13822 	addr = XEXP (addr, 0);
13823       if (GET_CODE (addr) == PLUS)
13824 	  {
13825             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13826 				      UNSPEC_GOTOFF);
13827 	    new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13828 	  }
13829 	else
13830           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13831       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13832       if (!reg)
13833         tmpreg = gen_reg_rtx (Pmode);
13834       else
13835 	tmpreg = reg;
13836       emit_move_insn (tmpreg, new_rtx);
13837 
13838       if (reg != 0)
13839 	{
13840 	  new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13841 					 tmpreg, 1, OPTAB_DIRECT);
13842 	  new_rtx = reg;
13843 	}
13844       else
13845         new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13846     }
13847   else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13848     {
13849       /* This symbol may be referenced via a displacement from the PIC
13850 	 base address (@GOTOFF).  */
13851 
13852       set_pic_reg_ever_live ();
13853       if (GET_CODE (addr) == CONST)
13854 	addr = XEXP (addr, 0);
13855       if (GET_CODE (addr) == PLUS)
13856 	  {
13857             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13858 				      UNSPEC_GOTOFF);
13859 	    new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13860 	  }
13861 	else
13862           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13863       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13864       new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13865 
13866       if (reg != 0)
13867 	{
13868 	  emit_move_insn (reg, new_rtx);
13869 	  new_rtx = reg;
13870 	}
13871     }
13872   else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13873 	   /* We can't use @GOTOFF for text labels on VxWorks;
13874 	      see gotoff_operand.  */
13875 	   || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13876     {
13877       rtx tmp = legitimize_pe_coff_symbol (addr, true);
13878       if (tmp)
13879         return tmp;
13880 
13881       /* For x64 PE-COFF there is no GOT table.  So we use address
13882          directly.  */
13883       if (TARGET_64BIT && TARGET_PECOFF)
13884 	{
13885 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13886 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13887 
13888 	  if (reg == 0)
13889 	    reg = gen_reg_rtx (Pmode);
13890 	  emit_move_insn (reg, new_rtx);
13891 	  new_rtx = reg;
13892 	}
13893       else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13894 	{
13895 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13896 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13897 	  new_rtx = gen_const_mem (Pmode, new_rtx);
13898 	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13899 
13900 	  if (reg == 0)
13901 	    reg = gen_reg_rtx (Pmode);
13902 	  /* Use directly gen_movsi, otherwise the address is loaded
13903 	     into register for CSE.  We don't want to CSE this addresses,
13904 	     instead we CSE addresses from the GOT table, so skip this.  */
13905 	  emit_insn (gen_movsi (reg, new_rtx));
13906 	  new_rtx = reg;
13907 	}
13908       else
13909 	{
13910 	  /* This symbol must be referenced via a load from the
13911 	     Global Offset Table (@GOT).  */
13912 
13913 	  set_pic_reg_ever_live ();
13914 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13915 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13916 	  if (TARGET_64BIT)
13917 	    new_rtx = force_reg (Pmode, new_rtx);
13918 	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13919 	  new_rtx = gen_const_mem (Pmode, new_rtx);
13920 	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13921 
13922 	  if (reg == 0)
13923 	    reg = gen_reg_rtx (Pmode);
13924 	  emit_move_insn (reg, new_rtx);
13925 	  new_rtx = reg;
13926 	}
13927     }
13928   else
13929     {
13930       if (CONST_INT_P (addr)
13931 	  && !x86_64_immediate_operand (addr, VOIDmode))
13932 	{
13933 	  if (reg)
13934 	    {
13935 	      emit_move_insn (reg, addr);
13936 	      new_rtx = reg;
13937 	    }
13938 	  else
13939 	    new_rtx = force_reg (Pmode, addr);
13940 	}
13941       else if (GET_CODE (addr) == CONST)
13942 	{
13943 	  addr = XEXP (addr, 0);
13944 
13945 	  /* We must match stuff we generate before.  Assume the only
13946 	     unspecs that can get here are ours.  Not that we could do
13947 	     anything with them anyway....  */
13948 	  if (GET_CODE (addr) == UNSPEC
13949 	      || (GET_CODE (addr) == PLUS
13950 		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13951 	    return orig;
13952 	  gcc_assert (GET_CODE (addr) == PLUS);
13953 	}
13954       if (GET_CODE (addr) == PLUS)
13955 	{
13956 	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13957 
13958 	  /* Check first to see if this is a constant offset from a @GOTOFF
13959 	     symbol reference.  */
13960 	  if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13961 	      && CONST_INT_P (op1))
13962 	    {
13963 	      if (!TARGET_64BIT)
13964 		{
13965 		  set_pic_reg_ever_live ();
13966 		  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13967 					    UNSPEC_GOTOFF);
13968 		  new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13969 		  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13970 		  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13971 
13972 		  if (reg != 0)
13973 		    {
13974 		      emit_move_insn (reg, new_rtx);
13975 		      new_rtx = reg;
13976 		    }
13977 		}
13978 	      else
13979 		{
13980 		  if (INTVAL (op1) < -16*1024*1024
13981 		      || INTVAL (op1) >= 16*1024*1024)
13982 		    {
13983 		      if (!x86_64_immediate_operand (op1, Pmode))
13984 			op1 = force_reg (Pmode, op1);
13985 		      new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13986 		    }
13987 		}
13988 	    }
13989 	  else
13990 	    {
13991 	      rtx base = legitimize_pic_address (op0, reg);
13992 	      machine_mode mode = GET_MODE (base);
13993 	      new_rtx
13994 	        = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13995 
13996 	      if (CONST_INT_P (new_rtx))
13997 		{
13998 		  if (INTVAL (new_rtx) < -16*1024*1024
13999 		      || INTVAL (new_rtx) >= 16*1024*1024)
14000 		    {
14001 		      if (!x86_64_immediate_operand (new_rtx, mode))
14002 			new_rtx = force_reg (mode, new_rtx);
14003 		      new_rtx
14004 		        = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
14005 		    }
14006 		  else
14007 		    new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
14008 		}
14009 	      else
14010 		{
14011 		  /* For %rip addressing, we have to use just disp32, not
14012 		     base nor index.  */
14013 		  if (TARGET_64BIT
14014 		      && (GET_CODE (base) == SYMBOL_REF
14015 			  || GET_CODE (base) == LABEL_REF))
14016 		    base = force_reg (mode, base);
14017 		  if (GET_CODE (new_rtx) == PLUS
14018 		      && CONSTANT_P (XEXP (new_rtx, 1)))
14019 		    {
14020 		      base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
14021 		      new_rtx = XEXP (new_rtx, 1);
14022 		    }
14023 		  new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
14024 		}
14025 	    }
14026 	}
14027     }
14028   return new_rtx;
14029 }
14030 
14031 /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
14032 
14033 static rtx
14034 get_thread_pointer (machine_mode tp_mode, bool to_reg)
14035 {
14036   rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
14037 
14038   if (GET_MODE (tp) != tp_mode)
14039     {
14040       gcc_assert (GET_MODE (tp) == SImode);
14041       gcc_assert (tp_mode == DImode);
14042 
14043       tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
14044     }
14045 
14046   if (to_reg)
14047     tp = copy_to_mode_reg (tp_mode, tp);
14048 
14049   return tp;
14050 }
14051 
14052 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
14053 
14054 static GTY(()) rtx ix86_tls_symbol;
14055 
14056 static rtx
14057 ix86_tls_get_addr (void)
14058 {
14059   if (!ix86_tls_symbol)
14060     {
14061       const char *sym
14062 	= ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
14063 	   ? "___tls_get_addr" : "__tls_get_addr");
14064 
14065       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
14066     }
14067 
14068   if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
14069     {
14070       rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
14071 				   UNSPEC_PLTOFF);
14072       return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
14073 			   gen_rtx_CONST (Pmode, unspec));
14074     }
14075 
14076   return ix86_tls_symbol;
14077 }
14078 
14079 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
14080 
14081 static GTY(()) rtx ix86_tls_module_base_symbol;
14082 
14083 rtx
14084 ix86_tls_module_base (void)
14085 {
14086   if (!ix86_tls_module_base_symbol)
14087     {
14088       ix86_tls_module_base_symbol
14089 	= gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
14090 
14091       SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14092 	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14093     }
14094 
14095   return ix86_tls_module_base_symbol;
14096 }
14097 
14098 /* A subroutine of ix86_legitimize_address and ix86_expand_move.  FOR_MOV is
14099    false if we expect this to be used for a memory address and true if
14100    we expect to load the address into a register.  */
14101 
14102 static rtx
14103 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
14104 {
14105   rtx dest, base, off;
14106   rtx pic = NULL_RTX, tp = NULL_RTX;
14107   machine_mode tp_mode = Pmode;
14108   int type;
14109 
14110   /* Fall back to global dynamic model if tool chain cannot support local
14111      dynamic.  */
14112   if (TARGET_SUN_TLS && !TARGET_64BIT
14113       && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
14114       && model == TLS_MODEL_LOCAL_DYNAMIC)
14115     model = TLS_MODEL_GLOBAL_DYNAMIC;
14116 
14117   switch (model)
14118     {
14119     case TLS_MODEL_GLOBAL_DYNAMIC:
14120       dest = gen_reg_rtx (Pmode);
14121 
14122       if (!TARGET_64BIT)
14123 	{
14124 	  if (flag_pic && !TARGET_PECOFF)
14125 	    pic = pic_offset_table_rtx;
14126 	  else
14127 	    {
14128 	      pic = gen_reg_rtx (Pmode);
14129 	      emit_insn (gen_set_got (pic));
14130 	    }
14131 	}
14132 
14133       if (TARGET_GNU2_TLS)
14134 	{
14135 	  if (TARGET_64BIT)
14136 	    emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14137 	  else
14138 	    emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14139 
14140 	  tp = get_thread_pointer (Pmode, true);
14141 	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14142 
14143 	  if (GET_MODE (x) != Pmode)
14144 	    x = gen_rtx_ZERO_EXTEND (Pmode, x);
14145 
14146 	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14147 	}
14148       else
14149 	{
14150 	  rtx caddr = ix86_tls_get_addr ();
14151 
14152 	  if (TARGET_64BIT)
14153 	    {
14154 	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
14155 	      rtx_insn *insns;
14156 
14157 	      start_sequence ();
14158 	      emit_call_insn
14159 		(ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14160 	      insns = get_insns ();
14161 	      end_sequence ();
14162 
14163 	      if (GET_MODE (x) != Pmode)
14164 		x = gen_rtx_ZERO_EXTEND (Pmode, x);
14165 
14166 	      RTL_CONST_CALL_P (insns) = 1;
14167 	      emit_libcall_block (insns, dest, rax, x);
14168 	    }
14169 	  else
14170 	    emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14171 	}
14172       break;
14173 
14174     case TLS_MODEL_LOCAL_DYNAMIC:
14175       base = gen_reg_rtx (Pmode);
14176 
14177       if (!TARGET_64BIT)
14178 	{
14179 	  if (flag_pic)
14180 	    pic = pic_offset_table_rtx;
14181 	  else
14182 	    {
14183 	      pic = gen_reg_rtx (Pmode);
14184 	      emit_insn (gen_set_got (pic));
14185 	    }
14186 	}
14187 
14188       if (TARGET_GNU2_TLS)
14189 	{
14190 	  rtx tmp = ix86_tls_module_base ();
14191 
14192 	  if (TARGET_64BIT)
14193 	    emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14194 	  else
14195 	    emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14196 
14197 	  tp = get_thread_pointer (Pmode, true);
14198 	  set_unique_reg_note (get_last_insn (), REG_EQUAL,
14199 			       gen_rtx_MINUS (Pmode, tmp, tp));
14200 	}
14201       else
14202 	{
14203 	  rtx caddr = ix86_tls_get_addr ();
14204 
14205 	  if (TARGET_64BIT)
14206 	    {
14207 	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
14208 	      rtx_insn *insns;
14209 	      rtx eqv;
14210 
14211 	      start_sequence ();
14212 	      emit_call_insn
14213 		(ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14214 	      insns = get_insns ();
14215 	      end_sequence ();
14216 
14217 	      /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14218 		 share the LD_BASE result with other LD model accesses.  */
14219 	      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14220 				    UNSPEC_TLS_LD_BASE);
14221 
14222 	      RTL_CONST_CALL_P (insns) = 1;
14223 	      emit_libcall_block (insns, base, rax, eqv);
14224 	    }
14225 	  else
14226 	    emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14227 	}
14228 
14229       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14230       off = gen_rtx_CONST (Pmode, off);
14231 
14232       dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14233 
14234       if (TARGET_GNU2_TLS)
14235 	{
14236 	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14237 
14238 	  if (GET_MODE (x) != Pmode)
14239 	    x = gen_rtx_ZERO_EXTEND (Pmode, x);
14240 
14241 	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14242 	}
14243       break;
14244 
14245     case TLS_MODEL_INITIAL_EXEC:
14246       if (TARGET_64BIT)
14247 	{
14248 	  if (TARGET_SUN_TLS && !TARGET_X32)
14249 	    {
14250 	      /* The Sun linker took the AMD64 TLS spec literally
14251 		 and can only handle %rax as destination of the
14252 		 initial executable code sequence.  */
14253 
14254 	      dest = gen_reg_rtx (DImode);
14255 	      emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14256 	      return dest;
14257 	    }
14258 
14259 	  /* Generate DImode references to avoid %fs:(%reg32)
14260 	     problems and linker IE->LE relaxation bug.  */
14261 	  tp_mode = DImode;
14262 	  pic = NULL;
14263 	  type = UNSPEC_GOTNTPOFF;
14264 	}
14265       else if (flag_pic)
14266 	{
14267 	  set_pic_reg_ever_live ();
14268 	  pic = pic_offset_table_rtx;
14269 	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14270 	}
14271       else if (!TARGET_ANY_GNU_TLS)
14272 	{
14273 	  pic = gen_reg_rtx (Pmode);
14274 	  emit_insn (gen_set_got (pic));
14275 	  type = UNSPEC_GOTTPOFF;
14276 	}
14277       else
14278 	{
14279 	  pic = NULL;
14280 	  type = UNSPEC_INDNTPOFF;
14281 	}
14282 
14283       off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14284       off = gen_rtx_CONST (tp_mode, off);
14285       if (pic)
14286 	off = gen_rtx_PLUS (tp_mode, pic, off);
14287       off = gen_const_mem (tp_mode, off);
14288       set_mem_alias_set (off, ix86_GOT_alias_set ());
14289 
14290       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14291 	{
14292 	  base = get_thread_pointer (tp_mode,
14293 				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14294 	  off = force_reg (tp_mode, off);
14295 	  return gen_rtx_PLUS (tp_mode, base, off);
14296 	}
14297       else
14298 	{
14299 	  base = get_thread_pointer (Pmode, true);
14300 	  dest = gen_reg_rtx (Pmode);
14301 	  emit_insn (ix86_gen_sub3 (dest, base, off));
14302 	}
14303       break;
14304 
14305     case TLS_MODEL_LOCAL_EXEC:
14306       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14307 			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14308 			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14309       off = gen_rtx_CONST (Pmode, off);
14310 
14311       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14312 	{
14313 	  base = get_thread_pointer (Pmode,
14314 				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14315 	  return gen_rtx_PLUS (Pmode, base, off);
14316 	}
14317       else
14318 	{
14319 	  base = get_thread_pointer (Pmode, true);
14320 	  dest = gen_reg_rtx (Pmode);
14321 	  emit_insn (ix86_gen_sub3 (dest, base, off));
14322 	}
14323       break;
14324 
14325     default:
14326       gcc_unreachable ();
14327     }
14328 
14329   return dest;
14330 }
14331 
14332 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14333    to symbol DECL if BEIMPORT is true.  Otherwise create or return the
14334    unique refptr-DECL symbol corresponding to symbol DECL.  */
14335 
14336 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14337 {
14338   static inline hashval_t hash (tree_map *m) { return m->hash; }
14339   static inline bool
14340   equal (tree_map *a, tree_map *b)
14341   {
14342     return a->base.from == b->base.from;
14343   }
14344 
14345   static void
14346   handle_cache_entry (tree_map *&m)
14347   {
14348     extern void gt_ggc_mx (tree_map *&);
14349     if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14350       return;
14351     else if (ggc_marked_p (m->base.from))
14352       gt_ggc_mx (m);
14353     else
14354       m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14355   }
14356 };
14357 
14358 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14359 
14360 static tree
14361 get_dllimport_decl (tree decl, bool beimport)
14362 {
14363   struct tree_map *h, in;
14364   const char *name;
14365   const char *prefix;
14366   size_t namelen, prefixlen;
14367   char *imp_name;
14368   tree to;
14369   rtx rtl;
14370 
14371   if (!dllimport_map)
14372     dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14373 
14374   in.hash = htab_hash_pointer (decl);
14375   in.base.from = decl;
14376   tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14377   h = *loc;
14378   if (h)
14379     return h->to;
14380 
14381   *loc = h = ggc_alloc<tree_map> ();
14382   h->hash = in.hash;
14383   h->base.from = decl;
14384   h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14385 			   VAR_DECL, NULL, ptr_type_node);
14386   DECL_ARTIFICIAL (to) = 1;
14387   DECL_IGNORED_P (to) = 1;
14388   DECL_EXTERNAL (to) = 1;
14389   TREE_READONLY (to) = 1;
14390 
14391   name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14392   name = targetm.strip_name_encoding (name);
14393   if (beimport)
14394     prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14395       ? "*__imp_" : "*__imp__";
14396   else
14397     prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14398   namelen = strlen (name);
14399   prefixlen = strlen (prefix);
14400   imp_name = (char *) alloca (namelen + prefixlen + 1);
14401   memcpy (imp_name, prefix, prefixlen);
14402   memcpy (imp_name + prefixlen, name, namelen + 1);
14403 
14404   name = ggc_alloc_string (imp_name, namelen + prefixlen);
14405   rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14406   SET_SYMBOL_REF_DECL (rtl, to);
14407   SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14408   if (!beimport)
14409     {
14410       SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14411 #ifdef SUB_TARGET_RECORD_STUB
14412       SUB_TARGET_RECORD_STUB (name);
14413 #endif
14414     }
14415 
14416   rtl = gen_const_mem (Pmode, rtl);
14417   set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14418 
14419   SET_DECL_RTL (to, rtl);
14420   SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14421 
14422   return to;
14423 }
14424 
14425 /* Expand SYMBOL into its corresponding far-addresse symbol.
14426    WANT_REG is true if we require the result be a register.  */
14427 
14428 static rtx
14429 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14430 {
14431   tree imp_decl;
14432   rtx x;
14433 
14434   gcc_assert (SYMBOL_REF_DECL (symbol));
14435   imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14436 
14437   x = DECL_RTL (imp_decl);
14438   if (want_reg)
14439     x = force_reg (Pmode, x);
14440   return x;
14441 }
14442 
14443 /* Expand SYMBOL into its corresponding dllimport symbol.  WANT_REG is
14444    true if we require the result be a register.  */
14445 
14446 static rtx
14447 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14448 {
14449   tree imp_decl;
14450   rtx x;
14451 
14452   gcc_assert (SYMBOL_REF_DECL (symbol));
14453   imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14454 
14455   x = DECL_RTL (imp_decl);
14456   if (want_reg)
14457     x = force_reg (Pmode, x);
14458   return x;
14459 }
14460 
14461 /* Expand SYMBOL into its corresponding dllimport or refptr symbol.  WANT_REG
14462    is true if we require the result be a register.  */
14463 
14464 static rtx
14465 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14466 {
14467   if (!TARGET_PECOFF)
14468     return NULL_RTX;
14469 
14470   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14471     {
14472       if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14473 	return legitimize_dllimport_symbol (addr, inreg);
14474       if (GET_CODE (addr) == CONST
14475 	  && GET_CODE (XEXP (addr, 0)) == PLUS
14476 	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14477 	  && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14478 	{
14479 	  rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14480 	  return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14481 	}
14482     }
14483 
14484   if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14485     return NULL_RTX;
14486   if (GET_CODE (addr) == SYMBOL_REF
14487       && !is_imported_p (addr)
14488       && SYMBOL_REF_EXTERNAL_P (addr)
14489       && SYMBOL_REF_DECL (addr))
14490     return legitimize_pe_coff_extern_decl (addr, inreg);
14491 
14492   if (GET_CODE (addr) == CONST
14493       && GET_CODE (XEXP (addr, 0)) == PLUS
14494       && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14495       && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14496       && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14497       && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14498     {
14499       rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14500       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14501     }
14502   return NULL_RTX;
14503 }
14504 
14505 /* Try machine-dependent ways of modifying an illegitimate address
14506    to be legitimate.  If we find one, return the new, valid address.
14507    This macro is used in only one place: `memory_address' in explow.c.
14508 
14509    OLDX is the address as it was before break_out_memory_refs was called.
14510    In some cases it is useful to look at this to decide what needs to be done.
14511 
14512    It is always safe for this macro to do nothing.  It exists to recognize
14513    opportunities to optimize the output.
14514 
14515    For the 80386, we handle X+REG by loading X into a register R and
14516    using R+REG.  R will go in a general reg and indexing will be used.
14517    However, if REG is a broken-out memory address or multiplication,
14518    nothing needs to be done because REG can certainly go in a general reg.
14519 
14520    When -fpic is used, special handling is needed for symbolic references.
14521    See comments by legitimize_pic_address in i386.c for details.  */
14522 
14523 static rtx
14524 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14525 {
14526   bool changed = false;
14527   unsigned log;
14528 
14529   log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14530   if (log)
14531     return legitimize_tls_address (x, (enum tls_model) log, false);
14532   if (GET_CODE (x) == CONST
14533       && GET_CODE (XEXP (x, 0)) == PLUS
14534       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14535       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14536     {
14537       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14538 				      (enum tls_model) log, false);
14539       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14540     }
14541 
14542   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14543     {
14544       rtx tmp = legitimize_pe_coff_symbol (x, true);
14545       if (tmp)
14546         return tmp;
14547     }
14548 
14549   if (flag_pic && SYMBOLIC_CONST (x))
14550     return legitimize_pic_address (x, 0);
14551 
14552 #if TARGET_MACHO
14553   if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14554     return machopic_indirect_data_reference (x, 0);
14555 #endif
14556 
14557   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14558   if (GET_CODE (x) == ASHIFT
14559       && CONST_INT_P (XEXP (x, 1))
14560       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14561     {
14562       changed = true;
14563       log = INTVAL (XEXP (x, 1));
14564       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14565 			GEN_INT (1 << log));
14566     }
14567 
14568   if (GET_CODE (x) == PLUS)
14569     {
14570       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
14571 
14572       if (GET_CODE (XEXP (x, 0)) == ASHIFT
14573 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14574 	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14575 	{
14576 	  changed = true;
14577 	  log = INTVAL (XEXP (XEXP (x, 0), 1));
14578 	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
14579 				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14580 				      GEN_INT (1 << log));
14581 	}
14582 
14583       if (GET_CODE (XEXP (x, 1)) == ASHIFT
14584 	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14585 	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14586 	{
14587 	  changed = true;
14588 	  log = INTVAL (XEXP (XEXP (x, 1), 1));
14589 	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
14590 				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14591 				      GEN_INT (1 << log));
14592 	}
14593 
14594       /* Put multiply first if it isn't already.  */
14595       if (GET_CODE (XEXP (x, 1)) == MULT)
14596 	{
14597 	  std::swap (XEXP (x, 0), XEXP (x, 1));
14598 	  changed = true;
14599 	}
14600 
14601       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14602 	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
14603 	 created by virtual register instantiation, register elimination, and
14604 	 similar optimizations.  */
14605       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14606 	{
14607 	  changed = true;
14608 	  x = gen_rtx_PLUS (Pmode,
14609 			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
14610 					  XEXP (XEXP (x, 1), 0)),
14611 			    XEXP (XEXP (x, 1), 1));
14612 	}
14613 
14614       /* Canonicalize
14615 	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14616 	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
14617       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14618 	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14619 	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14620 	       && CONSTANT_P (XEXP (x, 1)))
14621 	{
14622 	  rtx constant;
14623 	  rtx other = NULL_RTX;
14624 
14625 	  if (CONST_INT_P (XEXP (x, 1)))
14626 	    {
14627 	      constant = XEXP (x, 1);
14628 	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14629 	    }
14630 	  else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14631 	    {
14632 	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14633 	      other = XEXP (x, 1);
14634 	    }
14635 	  else
14636 	    constant = 0;
14637 
14638 	  if (constant)
14639 	    {
14640 	      changed = true;
14641 	      x = gen_rtx_PLUS (Pmode,
14642 				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14643 					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
14644 				plus_constant (Pmode, other,
14645 					       INTVAL (constant)));
14646 	    }
14647 	}
14648 
14649       if (changed && ix86_legitimate_address_p (mode, x, false))
14650 	return x;
14651 
14652       if (GET_CODE (XEXP (x, 0)) == MULT)
14653 	{
14654 	  changed = true;
14655 	  XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14656 	}
14657 
14658       if (GET_CODE (XEXP (x, 1)) == MULT)
14659 	{
14660 	  changed = true;
14661 	  XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14662 	}
14663 
14664       if (changed
14665 	  && REG_P (XEXP (x, 1))
14666 	  && REG_P (XEXP (x, 0)))
14667 	return x;
14668 
14669       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14670 	{
14671 	  changed = true;
14672 	  x = legitimize_pic_address (x, 0);
14673 	}
14674 
14675       if (changed && ix86_legitimate_address_p (mode, x, false))
14676 	return x;
14677 
14678       if (REG_P (XEXP (x, 0)))
14679 	{
14680 	  rtx temp = gen_reg_rtx (Pmode);
14681 	  rtx val  = force_operand (XEXP (x, 1), temp);
14682 	  if (val != temp)
14683 	    {
14684 	      val = convert_to_mode (Pmode, val, 1);
14685 	      emit_move_insn (temp, val);
14686 	    }
14687 
14688 	  XEXP (x, 1) = temp;
14689 	  return x;
14690 	}
14691 
14692       else if (REG_P (XEXP (x, 1)))
14693 	{
14694 	  rtx temp = gen_reg_rtx (Pmode);
14695 	  rtx val  = force_operand (XEXP (x, 0), temp);
14696 	  if (val != temp)
14697 	    {
14698 	      val = convert_to_mode (Pmode, val, 1);
14699 	      emit_move_insn (temp, val);
14700 	    }
14701 
14702 	  XEXP (x, 0) = temp;
14703 	  return x;
14704 	}
14705     }
14706 
14707   return x;
14708 }
14709 
14710 /* Print an integer constant expression in assembler syntax.  Addition
14711    and subtraction are the only arithmetic that may appear in these
14712    expressions.  FILE is the stdio stream to write to, X is the rtx, and
14713    CODE is the operand print code from the output string.  */
14714 
14715 static void
14716 output_pic_addr_const (FILE *file, rtx x, int code)
14717 {
14718   char buf[256];
14719 
14720   switch (GET_CODE (x))
14721     {
14722     case PC:
14723       gcc_assert (flag_pic);
14724       putc ('.', file);
14725       break;
14726 
14727     case SYMBOL_REF:
14728       if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14729 	output_addr_const (file, x);
14730       else
14731 	{
14732 	  const char *name = XSTR (x, 0);
14733 
14734 	  /* Mark the decl as referenced so that cgraph will
14735 	     output the function.  */
14736 	  if (SYMBOL_REF_DECL (x))
14737 	    mark_decl_referenced (SYMBOL_REF_DECL (x));
14738 
14739 #if TARGET_MACHO
14740 	  if (MACHOPIC_INDIRECT
14741 	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14742 	    name = machopic_indirection_name (x, /*stub_p=*/true);
14743 #endif
14744 	  assemble_name (file, name);
14745 	}
14746       if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14747 	  && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14748 	fputs ("@PLT", file);
14749       break;
14750 
14751     case LABEL_REF:
14752       x = XEXP (x, 0);
14753       /* FALLTHRU */
14754     case CODE_LABEL:
14755       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14756       assemble_name (asm_out_file, buf);
14757       break;
14758 
14759     case CONST_INT:
14760       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14761       break;
14762 
14763     case CONST:
14764       /* This used to output parentheses around the expression,
14765 	 but that does not work on the 386 (either ATT or BSD assembler).  */
14766       output_pic_addr_const (file, XEXP (x, 0), code);
14767       break;
14768 
14769     case CONST_DOUBLE:
14770       if (GET_MODE (x) == VOIDmode)
14771 	{
14772 	  /* We can use %d if the number is <32 bits and positive.  */
14773 	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14774 	    fprintf (file, "0x%lx%08lx",
14775 		     (unsigned long) CONST_DOUBLE_HIGH (x),
14776 		     (unsigned long) CONST_DOUBLE_LOW (x));
14777 	  else
14778 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14779 	}
14780       else
14781 	/* We can't handle floating point constants;
14782 	   TARGET_PRINT_OPERAND must handle them.  */
14783 	output_operand_lossage ("floating constant misused");
14784       break;
14785 
14786     case PLUS:
14787       /* Some assemblers need integer constants to appear first.  */
14788       if (CONST_INT_P (XEXP (x, 0)))
14789 	{
14790 	  output_pic_addr_const (file, XEXP (x, 0), code);
14791 	  putc ('+', file);
14792 	  output_pic_addr_const (file, XEXP (x, 1), code);
14793 	}
14794       else
14795 	{
14796 	  gcc_assert (CONST_INT_P (XEXP (x, 1)));
14797 	  output_pic_addr_const (file, XEXP (x, 1), code);
14798 	  putc ('+', file);
14799 	  output_pic_addr_const (file, XEXP (x, 0), code);
14800 	}
14801       break;
14802 
14803     case MINUS:
14804       if (!TARGET_MACHO)
14805 	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14806       output_pic_addr_const (file, XEXP (x, 0), code);
14807       putc ('-', file);
14808       output_pic_addr_const (file, XEXP (x, 1), code);
14809       if (!TARGET_MACHO)
14810 	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14811       break;
14812 
14813      case UNSPEC:
14814        if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14815 	 {
14816 	   bool f = i386_asm_output_addr_const_extra (file, x);
14817 	   gcc_assert (f);
14818 	   break;
14819 	 }
14820 
14821        gcc_assert (XVECLEN (x, 0) == 1);
14822        output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14823        switch (XINT (x, 1))
14824 	{
14825 	case UNSPEC_GOT:
14826 	  fputs ("@GOT", file);
14827 	  break;
14828 	case UNSPEC_GOTOFF:
14829 	  fputs ("@GOTOFF", file);
14830 	  break;
14831 	case UNSPEC_PLTOFF:
14832 	  fputs ("@PLTOFF", file);
14833 	  break;
14834 	case UNSPEC_PCREL:
14835 	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14836 		 "(%rip)" : "[rip]", file);
14837 	  break;
14838 	case UNSPEC_GOTPCREL:
14839 	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14840 		 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14841 	  break;
14842 	case UNSPEC_GOTTPOFF:
14843 	  /* FIXME: This might be @TPOFF in Sun ld too.  */
14844 	  fputs ("@gottpoff", file);
14845 	  break;
14846 	case UNSPEC_TPOFF:
14847 	  fputs ("@tpoff", file);
14848 	  break;
14849 	case UNSPEC_NTPOFF:
14850 	  if (TARGET_64BIT)
14851 	    fputs ("@tpoff", file);
14852 	  else
14853 	    fputs ("@ntpoff", file);
14854 	  break;
14855 	case UNSPEC_DTPOFF:
14856 	  fputs ("@dtpoff", file);
14857 	  break;
14858 	case UNSPEC_GOTNTPOFF:
14859 	  if (TARGET_64BIT)
14860 	    fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14861 		   "@gottpoff(%rip)": "@gottpoff[rip]", file);
14862 	  else
14863 	    fputs ("@gotntpoff", file);
14864 	  break;
14865 	case UNSPEC_INDNTPOFF:
14866 	  fputs ("@indntpoff", file);
14867 	  break;
14868 #if TARGET_MACHO
14869 	case UNSPEC_MACHOPIC_OFFSET:
14870 	  putc ('-', file);
14871 	  machopic_output_function_base_name (file);
14872 	  break;
14873 #endif
14874 	default:
14875 	  output_operand_lossage ("invalid UNSPEC as operand");
14876 	  break;
14877 	}
14878        break;
14879 
14880     default:
14881       output_operand_lossage ("invalid expression as operand");
14882     }
14883 }
14884 
14885 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14886    We need to emit DTP-relative relocations.  */
14887 
14888 static void ATTRIBUTE_UNUSED
14889 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14890 {
14891   fputs (ASM_LONG, file);
14892   output_addr_const (file, x);
14893   fputs ("@dtpoff", file);
14894   switch (size)
14895     {
14896     case 4:
14897       break;
14898     case 8:
14899       fputs (", 0", file);
14900       break;
14901     default:
14902       gcc_unreachable ();
14903    }
14904 }
14905 
14906 /* Return true if X is a representation of the PIC register.  This copes
14907    with calls from ix86_find_base_term, where the register might have
14908    been replaced by a cselib value.  */
14909 
14910 static bool
14911 ix86_pic_register_p (rtx x)
14912 {
14913   if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14914     return (pic_offset_table_rtx
14915 	    && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14916   else if (!REG_P (x))
14917     return false;
14918   else if (pic_offset_table_rtx)
14919     {
14920       if (REGNO (x) == REGNO (pic_offset_table_rtx))
14921 	return true;
14922       if (HARD_REGISTER_P (x)
14923 	  && !HARD_REGISTER_P (pic_offset_table_rtx)
14924 	  && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14925 	return true;
14926       return false;
14927     }
14928   else
14929     return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14930 }
14931 
14932 /* Helper function for ix86_delegitimize_address.
14933    Attempt to delegitimize TLS local-exec accesses.  */
14934 
14935 static rtx
14936 ix86_delegitimize_tls_address (rtx orig_x)
14937 {
14938   rtx x = orig_x, unspec;
14939   struct ix86_address addr;
14940 
14941   if (!TARGET_TLS_DIRECT_SEG_REFS)
14942     return orig_x;
14943   if (MEM_P (x))
14944     x = XEXP (x, 0);
14945   if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14946     return orig_x;
14947   if (ix86_decompose_address (x, &addr) == 0
14948       || addr.seg != DEFAULT_TLS_SEG_REG
14949       || addr.disp == NULL_RTX
14950       || GET_CODE (addr.disp) != CONST)
14951     return orig_x;
14952   unspec = XEXP (addr.disp, 0);
14953   if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14954     unspec = XEXP (unspec, 0);
14955   if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14956     return orig_x;
14957   x = XVECEXP (unspec, 0, 0);
14958   gcc_assert (GET_CODE (x) == SYMBOL_REF);
14959   if (unspec != XEXP (addr.disp, 0))
14960     x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14961   if (addr.index)
14962     {
14963       rtx idx = addr.index;
14964       if (addr.scale != 1)
14965 	idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14966       x = gen_rtx_PLUS (Pmode, idx, x);
14967     }
14968   if (addr.base)
14969     x = gen_rtx_PLUS (Pmode, addr.base, x);
14970   if (MEM_P (orig_x))
14971     x = replace_equiv_address_nv (orig_x, x);
14972   return x;
14973 }
14974 
14975 /* In the name of slightly smaller debug output, and to cater to
14976    general assembler lossage, recognize PIC+GOTOFF and turn it back
14977    into a direct symbol reference.
14978 
14979    On Darwin, this is necessary to avoid a crash, because Darwin
14980    has a different PIC label for each routine but the DWARF debugging
14981    information is not associated with any particular routine, so it's
14982    necessary to remove references to the PIC label from RTL stored by
14983    the DWARF output code.  */
14984 
14985 static rtx
14986 ix86_delegitimize_address (rtx x)
14987 {
14988   rtx orig_x = delegitimize_mem_from_attrs (x);
14989   /* addend is NULL or some rtx if x is something+GOTOFF where
14990      something doesn't include the PIC register.  */
14991   rtx addend = NULL_RTX;
14992   /* reg_addend is NULL or a multiple of some register.  */
14993   rtx reg_addend = NULL_RTX;
14994   /* const_addend is NULL or a const_int.  */
14995   rtx const_addend = NULL_RTX;
14996   /* This is the result, or NULL.  */
14997   rtx result = NULL_RTX;
14998 
14999   x = orig_x;
15000 
15001   if (MEM_P (x))
15002     x = XEXP (x, 0);
15003 
15004   if (TARGET_64BIT)
15005     {
15006       if (GET_CODE (x) == CONST
15007           && GET_CODE (XEXP (x, 0)) == PLUS
15008           && GET_MODE (XEXP (x, 0)) == Pmode
15009           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
15010           && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
15011           && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
15012         {
15013 	  rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
15014 	  x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
15015 	  if (MEM_P (orig_x))
15016 	    x = replace_equiv_address_nv (orig_x, x);
15017 	  return x;
15018 	}
15019 
15020       if (GET_CODE (x) == CONST
15021 	  && GET_CODE (XEXP (x, 0)) == UNSPEC
15022 	  && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
15023 	      || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
15024 	  && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
15025 	{
15026 	  x = XVECEXP (XEXP (x, 0), 0, 0);
15027 	  if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
15028 	    {
15029 	      x = simplify_gen_subreg (GET_MODE (orig_x), x,
15030 				       GET_MODE (x), 0);
15031 	      if (x == NULL_RTX)
15032 		return orig_x;
15033 	    }
15034 	  return x;
15035 	}
15036 
15037       if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
15038 	return ix86_delegitimize_tls_address (orig_x);
15039 
15040       /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
15041 	 and -mcmodel=medium -fpic.  */
15042     }
15043 
15044   if (GET_CODE (x) != PLUS
15045       || GET_CODE (XEXP (x, 1)) != CONST)
15046     return ix86_delegitimize_tls_address (orig_x);
15047 
15048   if (ix86_pic_register_p (XEXP (x, 0)))
15049     /* %ebx + GOT/GOTOFF */
15050     ;
15051   else if (GET_CODE (XEXP (x, 0)) == PLUS)
15052     {
15053       /* %ebx + %reg * scale + GOT/GOTOFF */
15054       reg_addend = XEXP (x, 0);
15055       if (ix86_pic_register_p (XEXP (reg_addend, 0)))
15056 	reg_addend = XEXP (reg_addend, 1);
15057       else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
15058 	reg_addend = XEXP (reg_addend, 0);
15059       else
15060 	{
15061 	  reg_addend = NULL_RTX;
15062 	  addend = XEXP (x, 0);
15063 	}
15064     }
15065   else
15066     addend = XEXP (x, 0);
15067 
15068   x = XEXP (XEXP (x, 1), 0);
15069   if (GET_CODE (x) == PLUS
15070       && CONST_INT_P (XEXP (x, 1)))
15071     {
15072       const_addend = XEXP (x, 1);
15073       x = XEXP (x, 0);
15074     }
15075 
15076   if (GET_CODE (x) == UNSPEC
15077       && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
15078 	  || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
15079 	  || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
15080 	      && !MEM_P (orig_x) && !addend)))
15081     result = XVECEXP (x, 0, 0);
15082 
15083   if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
15084       && !MEM_P (orig_x))
15085     result = XVECEXP (x, 0, 0);
15086 
15087   if (! result)
15088     return ix86_delegitimize_tls_address (orig_x);
15089 
15090   if (const_addend)
15091     result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
15092   if (reg_addend)
15093     result = gen_rtx_PLUS (Pmode, reg_addend, result);
15094   if (addend)
15095     {
15096       /* If the rest of original X doesn't involve the PIC register, add
15097 	 addend and subtract pic_offset_table_rtx.  This can happen e.g.
15098 	 for code like:
15099 	 leal (%ebx, %ecx, 4), %ecx
15100 	 ...
15101 	 movl foo@GOTOFF(%ecx), %edx
15102 	 in which case we return (%ecx - %ebx) + foo
15103 	 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
15104 	 and reload has completed.  */
15105       if (pic_offset_table_rtx
15106 	  && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
15107         result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
15108 						     pic_offset_table_rtx),
15109 			       result);
15110       else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
15111 	{
15112 	  rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
15113 	  tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
15114 	  result = gen_rtx_PLUS (Pmode, tmp, result);
15115 	}
15116       else
15117 	return orig_x;
15118     }
15119   if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
15120     {
15121       result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
15122       if (result == NULL_RTX)
15123 	return orig_x;
15124     }
15125   return result;
15126 }
15127 
15128 /* If X is a machine specific address (i.e. a symbol or label being
15129    referenced as a displacement from the GOT implemented using an
15130    UNSPEC), then return the base term.  Otherwise return X.  */
15131 
15132 rtx
15133 ix86_find_base_term (rtx x)
15134 {
15135   rtx term;
15136 
15137   if (TARGET_64BIT)
15138     {
15139       if (GET_CODE (x) != CONST)
15140 	return x;
15141       term = XEXP (x, 0);
15142       if (GET_CODE (term) == PLUS
15143 	  && (CONST_INT_P (XEXP (term, 1))
15144 	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
15145 	term = XEXP (term, 0);
15146       if (GET_CODE (term) != UNSPEC
15147 	  || (XINT (term, 1) != UNSPEC_GOTPCREL
15148 	      && XINT (term, 1) != UNSPEC_PCREL))
15149 	return x;
15150 
15151       return XVECEXP (term, 0, 0);
15152     }
15153 
15154   return ix86_delegitimize_address (x);
15155 }
15156 
15157 static void
15158 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15159 		    bool fp, FILE *file)
15160 {
15161   const char *suffix;
15162 
15163   if (mode == CCFPmode || mode == CCFPUmode)
15164     {
15165       code = ix86_fp_compare_code_to_integer (code);
15166       mode = CCmode;
15167     }
15168   if (reverse)
15169     code = reverse_condition (code);
15170 
15171   switch (code)
15172     {
15173     case EQ:
15174       switch (mode)
15175 	{
15176 	case CCAmode:
15177 	  suffix = "a";
15178 	  break;
15179 
15180 	case CCCmode:
15181 	  suffix = "c";
15182 	  break;
15183 
15184 	case CCOmode:
15185 	  suffix = "o";
15186 	  break;
15187 
15188 	case CCSmode:
15189 	  suffix = "s";
15190 	  break;
15191 
15192 	default:
15193 	  suffix = "e";
15194 	}
15195       break;
15196     case NE:
15197       switch (mode)
15198 	{
15199 	case CCAmode:
15200 	  suffix = "na";
15201 	  break;
15202 
15203 	case CCCmode:
15204 	  suffix = "nc";
15205 	  break;
15206 
15207 	case CCOmode:
15208 	  suffix = "no";
15209 	  break;
15210 
15211 	case CCSmode:
15212 	  suffix = "ns";
15213 	  break;
15214 
15215 	default:
15216 	  suffix = "ne";
15217 	}
15218       break;
15219     case GT:
15220       gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15221       suffix = "g";
15222       break;
15223     case GTU:
15224       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15225 	 Those same assemblers have the same but opposite lossage on cmov.  */
15226       if (mode == CCmode)
15227 	suffix = fp ? "nbe" : "a";
15228       else
15229 	gcc_unreachable ();
15230       break;
15231     case LT:
15232       switch (mode)
15233 	{
15234 	case CCNOmode:
15235 	case CCGOCmode:
15236 	  suffix = "s";
15237 	  break;
15238 
15239 	case CCmode:
15240 	case CCGCmode:
15241 	  suffix = "l";
15242 	  break;
15243 
15244 	default:
15245 	  gcc_unreachable ();
15246 	}
15247       break;
15248     case LTU:
15249       if (mode == CCmode)
15250 	suffix = "b";
15251       else if (mode == CCCmode)
15252 	suffix = fp ? "b" : "c";
15253       else
15254 	gcc_unreachable ();
15255       break;
15256     case GE:
15257       switch (mode)
15258 	{
15259 	case CCNOmode:
15260 	case CCGOCmode:
15261 	  suffix = "ns";
15262 	  break;
15263 
15264 	case CCmode:
15265 	case CCGCmode:
15266 	  suffix = "ge";
15267 	  break;
15268 
15269 	default:
15270 	  gcc_unreachable ();
15271 	}
15272       break;
15273     case GEU:
15274       if (mode == CCmode)
15275 	suffix = "nb";
15276       else if (mode == CCCmode)
15277 	suffix = fp ? "nb" : "nc";
15278       else
15279 	gcc_unreachable ();
15280       break;
15281     case LE:
15282       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15283       suffix = "le";
15284       break;
15285     case LEU:
15286       if (mode == CCmode)
15287 	suffix = "be";
15288       else
15289 	gcc_unreachable ();
15290       break;
15291     case UNORDERED:
15292       suffix = fp ? "u" : "p";
15293       break;
15294     case ORDERED:
15295       suffix = fp ? "nu" : "np";
15296       break;
15297     default:
15298       gcc_unreachable ();
15299     }
15300   fputs (suffix, file);
15301 }
15302 
15303 /* Print the name of register X to FILE based on its machine mode and number.
15304    If CODE is 'w', pretend the mode is HImode.
15305    If CODE is 'b', pretend the mode is QImode.
15306    If CODE is 'k', pretend the mode is SImode.
15307    If CODE is 'q', pretend the mode is DImode.
15308    If CODE is 'x', pretend the mode is V4SFmode.
15309    If CODE is 't', pretend the mode is V8SFmode.
15310    If CODE is 'g', pretend the mode is V16SFmode.
15311    If CODE is 'h', pretend the reg is the 'high' byte register.
15312    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15313    If CODE is 'd', duplicate the operand for AVX instruction.
15314  */
15315 
15316 void
15317 print_reg (rtx x, int code, FILE *file)
15318 {
15319   const char *reg;
15320   unsigned int regno;
15321   bool duplicated = code == 'd' && TARGET_AVX;
15322 
15323   if (ASSEMBLER_DIALECT == ASM_ATT)
15324     putc ('%', file);
15325 
15326   if (x == pc_rtx)
15327     {
15328       gcc_assert (TARGET_64BIT);
15329       fputs ("rip", file);
15330       return;
15331     }
15332 
15333   regno = true_regnum (x);
15334   gcc_assert (regno != ARG_POINTER_REGNUM
15335 	      && regno != FRAME_POINTER_REGNUM
15336 	      && regno != FLAGS_REG
15337 	      && regno != FPSR_REG
15338 	      && regno != FPCR_REG);
15339 
15340   if (code == 'w' || MMX_REG_P (x))
15341     code = 2;
15342   else if (code == 'b')
15343     code = 1;
15344   else if (code == 'k')
15345     code = 4;
15346   else if (code == 'q')
15347     code = 8;
15348   else if (code == 'y')
15349     code = 3;
15350   else if (code == 'h')
15351     code = 0;
15352   else if (code == 'x')
15353     code = 16;
15354   else if (code == 't')
15355     code = 32;
15356   else if (code == 'g')
15357     code = 64;
15358   else
15359     code = GET_MODE_SIZE (GET_MODE (x));
15360 
15361   /* Irritatingly, AMD extended registers use different naming convention
15362      from the normal registers: "r%d[bwd]"  */
15363   if (REX_INT_REGNO_P (regno))
15364     {
15365       gcc_assert (TARGET_64BIT);
15366       putc ('r', file);
15367       fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15368       switch (code)
15369 	{
15370 	  case 0:
15371 	    error ("extended registers have no high halves");
15372 	    break;
15373 	  case 1:
15374 	    putc ('b', file);
15375 	    break;
15376 	  case 2:
15377 	    putc ('w', file);
15378 	    break;
15379 	  case 4:
15380 	    putc ('d', file);
15381 	    break;
15382 	  case 8:
15383 	    /* no suffix */
15384 	    break;
15385 	  default:
15386 	    error ("unsupported operand size for extended register");
15387 	    break;
15388 	}
15389       return;
15390     }
15391 
15392   reg = NULL;
15393   switch (code)
15394     {
15395     case 3:
15396       if (STACK_TOP_P (x))
15397 	{
15398 	  reg = "st(0)";
15399 	  break;
15400 	}
15401       /* FALLTHRU */
15402     case 8:
15403     case 4:
15404     case 12:
15405       if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15406 	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15407       /* FALLTHRU */
15408     case 16:
15409     case 2:
15410     normal:
15411       reg = hi_reg_name[regno];
15412       break;
15413     case 1:
15414       if (regno >= ARRAY_SIZE (qi_reg_name))
15415 	goto normal;
15416       reg = qi_reg_name[regno];
15417       break;
15418     case 0:
15419       if (regno >= ARRAY_SIZE (qi_high_reg_name))
15420 	goto normal;
15421       reg = qi_high_reg_name[regno];
15422       break;
15423     case 32:
15424       if (SSE_REG_P (x))
15425 	{
15426 	  gcc_assert (!duplicated);
15427 	  putc ('y', file);
15428 	  fputs (hi_reg_name[regno] + 1, file);
15429 	  return;
15430 	}
15431     case 64:
15432       if (SSE_REG_P (x))
15433         {
15434           gcc_assert (!duplicated);
15435           putc ('z', file);
15436           fputs (hi_reg_name[REGNO (x)] + 1, file);
15437           return;
15438         }
15439       break;
15440     default:
15441       gcc_unreachable ();
15442     }
15443 
15444   fputs (reg, file);
15445   if (duplicated)
15446     {
15447       if (ASSEMBLER_DIALECT == ASM_ATT)
15448 	fprintf (file, ", %%%s", reg);
15449       else
15450 	fprintf (file, ", %s", reg);
15451     }
15452 }
15453 
15454 /* Meaning of CODE:
15455    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15456    C -- print opcode suffix for set/cmov insn.
15457    c -- like C, but print reversed condition
15458    F,f -- likewise, but for floating-point.
15459    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15460 	otherwise nothing
15461    R -- print embeded rounding and sae.
15462    r -- print only sae.
15463    z -- print the opcode suffix for the size of the current operand.
15464    Z -- likewise, with special suffixes for x87 instructions.
15465    * -- print a star (in certain assembler syntax)
15466    A -- print an absolute memory reference.
15467    E -- print address with DImode register names if TARGET_64BIT.
15468    w -- print the operand as if it's a "word" (HImode) even if it isn't.
15469    s -- print a shift double count, followed by the assemblers argument
15470 	delimiter.
15471    b -- print the QImode name of the register for the indicated operand.
15472 	%b0 would print %al if operands[0] is reg 0.
15473    w --  likewise, print the HImode name of the register.
15474    k --  likewise, print the SImode name of the register.
15475    q --  likewise, print the DImode name of the register.
15476    x --  likewise, print the V4SFmode name of the register.
15477    t --  likewise, print the V8SFmode name of the register.
15478    g --  likewise, print the V16SFmode name of the register.
15479    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15480    y -- print "st(0)" instead of "st" as a register.
15481    d -- print duplicated register operand for AVX instruction.
15482    D -- print condition for SSE cmp instruction.
15483    P -- if PIC, print an @PLT suffix.
15484    p -- print raw symbol name.
15485    X -- don't print any sort of PIC '@' suffix for a symbol.
15486    & -- print some in-use local-dynamic symbol name.
15487    H -- print a memory address offset by 8; used for sse high-parts
15488    Y -- print condition for XOP pcom* instruction.
15489    + -- print a branch hint as 'cs' or 'ds' prefix
15490    ; -- print a semicolon (after prefixes due to bug in older gas).
15491    ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15492    @ -- print a segment register of thread base pointer load
15493    ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15494    ! -- print MPX prefix for jxx/call/ret instructions if required.
15495  */
15496 
15497 void
15498 ix86_print_operand (FILE *file, rtx x, int code)
15499 {
15500   if (code)
15501     {
15502       switch (code)
15503 	{
15504 	case 'A':
15505 	  switch (ASSEMBLER_DIALECT)
15506 	    {
15507 	    case ASM_ATT:
15508 	      putc ('*', file);
15509 	      break;
15510 
15511 	    case ASM_INTEL:
15512 	      /* Intel syntax. For absolute addresses, registers should not
15513 		 be surrounded by braces.  */
15514 	      if (!REG_P (x))
15515 		{
15516 		  putc ('[', file);
15517 		  ix86_print_operand (file, x, 0);
15518 		  putc (']', file);
15519 		  return;
15520 		}
15521 	      break;
15522 
15523 	    default:
15524 	      gcc_unreachable ();
15525 	    }
15526 
15527 	  ix86_print_operand (file, x, 0);
15528 	  return;
15529 
15530 	case 'E':
15531 	  /* Wrap address in an UNSPEC to declare special handling.  */
15532 	  if (TARGET_64BIT)
15533 	    x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15534 
15535 	  output_address (x);
15536 	  return;
15537 
15538 	case 'L':
15539 	  if (ASSEMBLER_DIALECT == ASM_ATT)
15540 	    putc ('l', file);
15541 	  return;
15542 
15543 	case 'W':
15544 	  if (ASSEMBLER_DIALECT == ASM_ATT)
15545 	    putc ('w', file);
15546 	  return;
15547 
15548 	case 'B':
15549 	  if (ASSEMBLER_DIALECT == ASM_ATT)
15550 	    putc ('b', file);
15551 	  return;
15552 
15553 	case 'Q':
15554 	  if (ASSEMBLER_DIALECT == ASM_ATT)
15555 	    putc ('l', file);
15556 	  return;
15557 
15558 	case 'S':
15559 	  if (ASSEMBLER_DIALECT == ASM_ATT)
15560 	    putc ('s', file);
15561 	  return;
15562 
15563 	case 'T':
15564 	  if (ASSEMBLER_DIALECT == ASM_ATT)
15565 	    putc ('t', file);
15566 	  return;
15567 
15568 	case 'O':
15569 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15570 	  if (ASSEMBLER_DIALECT != ASM_ATT)
15571 	    return;
15572 
15573 	  switch (GET_MODE_SIZE (GET_MODE (x)))
15574 	    {
15575 	    case 2:
15576 	      putc ('w', file);
15577 	      break;
15578 
15579 	    case 4:
15580 	      putc ('l', file);
15581 	      break;
15582 
15583 	    case 8:
15584 	      putc ('q', file);
15585 	      break;
15586 
15587 	    default:
15588 	      output_operand_lossage
15589 		("invalid operand size for operand code 'O'");
15590 	      return;
15591 	    }
15592 
15593 	  putc ('.', file);
15594 #endif
15595 	  return;
15596 
15597 	case 'z':
15598 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15599 	    {
15600 	      /* Opcodes don't get size suffixes if using Intel opcodes.  */
15601 	      if (ASSEMBLER_DIALECT == ASM_INTEL)
15602 		return;
15603 
15604 	      switch (GET_MODE_SIZE (GET_MODE (x)))
15605 		{
15606 		case 1:
15607 		  putc ('b', file);
15608 		  return;
15609 
15610 		case 2:
15611 		  putc ('w', file);
15612 		  return;
15613 
15614 		case 4:
15615 		  putc ('l', file);
15616 		  return;
15617 
15618 		case 8:
15619 		  putc ('q', file);
15620 		  return;
15621 
15622 		default:
15623 		  output_operand_lossage
15624 		    ("invalid operand size for operand code 'z'");
15625 		  return;
15626 		}
15627 	    }
15628 
15629 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15630 	    warning
15631 	      (0, "non-integer operand used with operand code 'z'");
15632 	  /* FALLTHRU */
15633 
15634 	case 'Z':
15635 	  /* 387 opcodes don't get size suffixes if using Intel opcodes.  */
15636 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
15637 	    return;
15638 
15639 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15640 	    {
15641 	      switch (GET_MODE_SIZE (GET_MODE (x)))
15642 		{
15643 		case 2:
15644 #ifdef HAVE_AS_IX86_FILDS
15645 		  putc ('s', file);
15646 #endif
15647 		  return;
15648 
15649 		case 4:
15650 		  putc ('l', file);
15651 		  return;
15652 
15653 		case 8:
15654 #ifdef HAVE_AS_IX86_FILDQ
15655 		  putc ('q', file);
15656 #else
15657 		  fputs ("ll", file);
15658 #endif
15659 		  return;
15660 
15661 		default:
15662 		  break;
15663 		}
15664 	    }
15665 	  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15666 	    {
15667 	      /* 387 opcodes don't get size suffixes
15668 		 if the operands are registers.  */
15669 	      if (STACK_REG_P (x))
15670 		return;
15671 
15672 	      switch (GET_MODE_SIZE (GET_MODE (x)))
15673 		{
15674 		case 4:
15675 		  putc ('s', file);
15676 		  return;
15677 
15678 		case 8:
15679 		  putc ('l', file);
15680 		  return;
15681 
15682 		case 12:
15683 		case 16:
15684 		  putc ('t', file);
15685 		  return;
15686 
15687 		default:
15688 		  break;
15689 		}
15690 	    }
15691 	  else
15692 	    {
15693 	      output_operand_lossage
15694 		("invalid operand type used with operand code 'Z'");
15695 	      return;
15696 	    }
15697 
15698 	  output_operand_lossage
15699 	    ("invalid operand size for operand code 'Z'");
15700 	  return;
15701 
15702 	case 'd':
15703 	case 'b':
15704 	case 'w':
15705 	case 'k':
15706 	case 'q':
15707 	case 'h':
15708 	case 't':
15709 	case 'g':
15710 	case 'y':
15711 	case 'x':
15712 	case 'X':
15713 	case 'P':
15714 	case 'p':
15715 	  break;
15716 
15717 	case 's':
15718 	  if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15719 	    {
15720 	      ix86_print_operand (file, x, 0);
15721 	      fputs (", ", file);
15722 	    }
15723 	  return;
15724 
15725 	case 'Y':
15726 	  switch (GET_CODE (x))
15727 	    {
15728 	    case NE:
15729 	      fputs ("neq", file);
15730 	      break;
15731 	    case EQ:
15732 	      fputs ("eq", file);
15733 	      break;
15734 	    case GE:
15735 	    case GEU:
15736 	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15737 	      break;
15738 	    case GT:
15739 	    case GTU:
15740 	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15741 	      break;
15742 	    case LE:
15743 	    case LEU:
15744 	      fputs ("le", file);
15745 	      break;
15746 	    case LT:
15747 	    case LTU:
15748 	      fputs ("lt", file);
15749 	      break;
15750 	    case UNORDERED:
15751 	      fputs ("unord", file);
15752 	      break;
15753 	    case ORDERED:
15754 	      fputs ("ord", file);
15755 	      break;
15756 	    case UNEQ:
15757 	      fputs ("ueq", file);
15758 	      break;
15759 	    case UNGE:
15760 	      fputs ("nlt", file);
15761 	      break;
15762 	    case UNGT:
15763 	      fputs ("nle", file);
15764 	      break;
15765 	    case UNLE:
15766 	      fputs ("ule", file);
15767 	      break;
15768 	    case UNLT:
15769 	      fputs ("ult", file);
15770 	      break;
15771 	    case LTGT:
15772 	      fputs ("une", file);
15773 	      break;
15774 	    default:
15775 	      output_operand_lossage ("operand is not a condition code, "
15776 				      "invalid operand code 'Y'");
15777 	      return;
15778 	    }
15779 	  return;
15780 
15781 	case 'D':
15782 	  /* Little bit of braindamage here.  The SSE compare instructions
15783 	     does use completely different names for the comparisons that the
15784 	     fp conditional moves.  */
15785 	  switch (GET_CODE (x))
15786 	    {
15787 	    case UNEQ:
15788 	      if (TARGET_AVX)
15789 		{
15790 		  fputs ("eq_us", file);
15791 		  break;
15792 		}
15793 	    case EQ:
15794 	      fputs ("eq", file);
15795 	      break;
15796 	    case UNLT:
15797 	      if (TARGET_AVX)
15798 		{
15799 		  fputs ("nge", file);
15800 		  break;
15801 		}
15802 	    case LT:
15803 	      fputs ("lt", file);
15804 	      break;
15805 	    case UNLE:
15806 	      if (TARGET_AVX)
15807 		{
15808 		  fputs ("ngt", file);
15809 		  break;
15810 		}
15811 	    case LE:
15812 	      fputs ("le", file);
15813 	      break;
15814 	    case UNORDERED:
15815 	      fputs ("unord", file);
15816 	      break;
15817 	    case LTGT:
15818 	      if (TARGET_AVX)
15819 		{
15820 		  fputs ("neq_oq", file);
15821 		  break;
15822 		}
15823 	    case NE:
15824 	      fputs ("neq", file);
15825 	      break;
15826 	    case GE:
15827 	      if (TARGET_AVX)
15828 		{
15829 		  fputs ("ge", file);
15830 		  break;
15831 		}
15832 	    case UNGE:
15833 	      fputs ("nlt", file);
15834 	      break;
15835 	    case GT:
15836 	      if (TARGET_AVX)
15837 		{
15838 		  fputs ("gt", file);
15839 		  break;
15840 		}
15841 	    case UNGT:
15842 	      fputs ("nle", file);
15843 	      break;
15844 	    case ORDERED:
15845 	      fputs ("ord", file);
15846 	      break;
15847 	    default:
15848 	      output_operand_lossage ("operand is not a condition code, "
15849 				      "invalid operand code 'D'");
15850 	      return;
15851 	    }
15852 	  return;
15853 
15854 	case 'F':
15855 	case 'f':
15856 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15857 	  if (ASSEMBLER_DIALECT == ASM_ATT)
15858 	    putc ('.', file);
15859 #endif
15860 
15861 	case 'C':
15862 	case 'c':
15863 	  if (!COMPARISON_P (x))
15864 	    {
15865 	      output_operand_lossage ("operand is not a condition code, "
15866 				      "invalid operand code '%c'", code);
15867 	      return;
15868 	    }
15869 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15870 			      code == 'c' || code == 'f',
15871 			      code == 'F' || code == 'f',
15872 			      file);
15873 	  return;
15874 
15875 	case 'H':
15876 	  if (!offsettable_memref_p (x))
15877 	    {
15878 	      output_operand_lossage ("operand is not an offsettable memory "
15879 				      "reference, invalid operand code 'H'");
15880 	      return;
15881 	    }
15882 	  /* It doesn't actually matter what mode we use here, as we're
15883 	     only going to use this for printing.  */
15884 	  x = adjust_address_nv (x, DImode, 8);
15885 	  /* Output 'qword ptr' for intel assembler dialect.  */
15886 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
15887 	    code = 'q';
15888 	  break;
15889 
15890 	case 'K':
15891 	  gcc_assert (CONST_INT_P (x));
15892 
15893 	  if (INTVAL (x) & IX86_HLE_ACQUIRE)
15894 #ifdef HAVE_AS_IX86_HLE
15895 	    fputs ("xacquire ", file);
15896 #else
15897 	    fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15898 #endif
15899 	  else if (INTVAL (x) & IX86_HLE_RELEASE)
15900 #ifdef HAVE_AS_IX86_HLE
15901 	    fputs ("xrelease ", file);
15902 #else
15903 	    fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15904 #endif
15905 	  /* We do not want to print value of the operand.  */
15906 	  return;
15907 
15908 	case 'N':
15909 	  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15910 	    fputs ("{z}", file);
15911 	  return;
15912 
15913 	case 'r':
15914 	  gcc_assert (CONST_INT_P (x));
15915 	  gcc_assert (INTVAL (x) == ROUND_SAE);
15916 
15917 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
15918 	    fputs (", ", file);
15919 
15920 	  fputs ("{sae}", file);
15921 
15922 	  if (ASSEMBLER_DIALECT == ASM_ATT)
15923 	    fputs (", ", file);
15924 
15925 	  return;
15926 
15927 	case 'R':
15928 	  gcc_assert (CONST_INT_P (x));
15929 
15930 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
15931 	    fputs (", ", file);
15932 
15933 	  switch (INTVAL (x))
15934 	    {
15935 	    case ROUND_NEAREST_INT | ROUND_SAE:
15936 	      fputs ("{rn-sae}", file);
15937 	      break;
15938 	    case ROUND_NEG_INF | ROUND_SAE:
15939 	      fputs ("{rd-sae}", file);
15940 	      break;
15941 	    case ROUND_POS_INF | ROUND_SAE:
15942 	      fputs ("{ru-sae}", file);
15943 	      break;
15944 	    case ROUND_ZERO | ROUND_SAE:
15945 	      fputs ("{rz-sae}", file);
15946 	      break;
15947 	    default:
15948 	      gcc_unreachable ();
15949 	    }
15950 
15951 	  if (ASSEMBLER_DIALECT == ASM_ATT)
15952 	    fputs (", ", file);
15953 
15954 	  return;
15955 
15956 	case '*':
15957 	  if (ASSEMBLER_DIALECT == ASM_ATT)
15958 	    putc ('*', file);
15959 	  return;
15960 
15961 	case '&':
15962 	  {
15963 	    const char *name = get_some_local_dynamic_name ();
15964 	    if (name == NULL)
15965 	      output_operand_lossage ("'%%&' used without any "
15966 				      "local dynamic TLS references");
15967 	    else
15968 	      assemble_name (file, name);
15969 	    return;
15970 	  }
15971 
15972 	case '+':
15973 	  {
15974 	    rtx x;
15975 
15976 	    if (!optimize
15977 	        || optimize_function_for_size_p (cfun)
15978 		|| !TARGET_BRANCH_PREDICTION_HINTS)
15979 	      return;
15980 
15981 	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15982 	    if (x)
15983 	      {
15984 		int pred_val = XINT (x, 0);
15985 
15986 		if (pred_val < REG_BR_PROB_BASE * 45 / 100
15987 		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
15988 		  {
15989 		    bool taken = pred_val > REG_BR_PROB_BASE / 2;
15990 		    bool cputaken
15991 		      = final_forward_branch_p (current_output_insn) == 0;
15992 
15993 		    /* Emit hints only in the case default branch prediction
15994 		       heuristics would fail.  */
15995 		    if (taken != cputaken)
15996 		      {
15997 			/* We use 3e (DS) prefix for taken branches and
15998 			   2e (CS) prefix for not taken branches.  */
15999 			if (taken)
16000 			  fputs ("ds ; ", file);
16001 			else
16002 			  fputs ("cs ; ", file);
16003 		      }
16004 		  }
16005 	      }
16006 	    return;
16007 	  }
16008 
16009 	case ';':
16010 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
16011 	  putc (';', file);
16012 #endif
16013 	  return;
16014 
16015 	case '@':
16016 	  if (ASSEMBLER_DIALECT == ASM_ATT)
16017 	    putc ('%', file);
16018 
16019 	  /* The kernel uses a different segment register for performance
16020 	     reasons; a system call would not have to trash the userspace
16021 	     segment register, which would be expensive.  */
16022 	  if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
16023 	    fputs ("fs", file);
16024 	  else
16025 	    fputs ("gs", file);
16026 	  return;
16027 
16028 	case '~':
16029 	  putc (TARGET_AVX2 ? 'i' : 'f', file);
16030 	  return;
16031 
16032 	case '^':
16033 	  if (TARGET_64BIT && Pmode != word_mode)
16034 	    fputs ("addr32 ", file);
16035 	  return;
16036 
16037 	case '!':
16038 	  if (ix86_bnd_prefixed_insn_p (current_output_insn))
16039 	    fputs ("bnd ", file);
16040 	  return;
16041 
16042 	default:
16043 	    output_operand_lossage ("invalid operand code '%c'", code);
16044 	}
16045     }
16046 
16047   if (REG_P (x))
16048     print_reg (x, code, file);
16049 
16050   else if (MEM_P (x))
16051     {
16052       /* No `byte ptr' prefix for call instructions or BLKmode operands.  */
16053       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
16054 	  && GET_MODE (x) != BLKmode)
16055 	{
16056 	  const char * size;
16057 	  switch (GET_MODE_SIZE (GET_MODE (x)))
16058 	    {
16059 	    case 1: size = "BYTE"; break;
16060 	    case 2: size = "WORD"; break;
16061 	    case 4: size = "DWORD"; break;
16062 	    case 8: size = "QWORD"; break;
16063 	    case 12: size = "TBYTE"; break;
16064 	    case 16:
16065 	      if (GET_MODE (x) == XFmode)
16066 		size = "TBYTE";
16067               else
16068 		size = "XMMWORD";
16069               break;
16070 	    case 32: size = "YMMWORD"; break;
16071 	    case 64: size = "ZMMWORD"; break;
16072 	    default:
16073 	      gcc_unreachable ();
16074 	    }
16075 
16076 	  /* Check for explicit size override (codes 'b', 'w', 'k',
16077 	     'q' and 'x')  */
16078 	  if (code == 'b')
16079 	    size = "BYTE";
16080 	  else if (code == 'w')
16081 	    size = "WORD";
16082 	  else if (code == 'k')
16083 	    size = "DWORD";
16084 	  else if (code == 'q')
16085 	    size = "QWORD";
16086 	  else if (code == 'x')
16087 	    size = "XMMWORD";
16088 
16089 	  fputs (size, file);
16090 	  fputs (" PTR ", file);
16091 	}
16092 
16093       x = XEXP (x, 0);
16094       /* Avoid (%rip) for call operands.  */
16095       if (CONSTANT_ADDRESS_P (x) && code == 'P'
16096 	  && !CONST_INT_P (x))
16097 	output_addr_const (file, x);
16098       else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
16099 	output_operand_lossage ("invalid constraints for operand");
16100       else
16101 	output_address (x);
16102     }
16103 
16104   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
16105     {
16106       REAL_VALUE_TYPE r;
16107       long l;
16108 
16109       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16110       REAL_VALUE_TO_TARGET_SINGLE (r, l);
16111 
16112       if (ASSEMBLER_DIALECT == ASM_ATT)
16113 	putc ('$', file);
16114       /* Sign extend 32bit SFmode immediate to 8 bytes.  */
16115       if (code == 'q')
16116 	fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
16117 		 (unsigned long long) (int) l);
16118       else
16119 	fprintf (file, "0x%08x", (unsigned int) l);
16120     }
16121 
16122   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
16123     {
16124       REAL_VALUE_TYPE r;
16125       long l[2];
16126 
16127       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16128       REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16129 
16130       if (ASSEMBLER_DIALECT == ASM_ATT)
16131 	putc ('$', file);
16132       fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
16133     }
16134 
16135   /* These float cases don't actually occur as immediate operands.  */
16136   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
16137     {
16138       char dstr[30];
16139 
16140       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
16141       fputs (dstr, file);
16142     }
16143 
16144   else
16145     {
16146       /* We have patterns that allow zero sets of memory, for instance.
16147 	 In 64-bit mode, we should probably support all 8-byte vectors,
16148 	 since we can in fact encode that into an immediate.  */
16149       if (GET_CODE (x) == CONST_VECTOR)
16150 	{
16151 	  gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16152 	  x = const0_rtx;
16153 	}
16154 
16155       if (code != 'P' && code != 'p')
16156 	{
16157 	  if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
16158 	    {
16159 	      if (ASSEMBLER_DIALECT == ASM_ATT)
16160 		putc ('$', file);
16161 	    }
16162 	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16163 		   || GET_CODE (x) == LABEL_REF)
16164 	    {
16165 	      if (ASSEMBLER_DIALECT == ASM_ATT)
16166 		putc ('$', file);
16167 	      else
16168 		fputs ("OFFSET FLAT:", file);
16169 	    }
16170 	}
16171       if (CONST_INT_P (x))
16172 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16173       else if (flag_pic || MACHOPIC_INDIRECT)
16174 	output_pic_addr_const (file, x, code);
16175       else
16176 	output_addr_const (file, x);
16177     }
16178 }
16179 
16180 static bool
16181 ix86_print_operand_punct_valid_p (unsigned char code)
16182 {
16183   return (code == '@' || code == '*' || code == '+' || code == '&'
16184 	  || code == ';' || code == '~' || code == '^' || code == '!');
16185 }
16186 
16187 /* Print a memory operand whose address is ADDR.  */
16188 
16189 static void
16190 ix86_print_operand_address (FILE *file, rtx addr)
16191 {
16192   struct ix86_address parts;
16193   rtx base, index, disp;
16194   int scale;
16195   int ok;
16196   bool vsib = false;
16197   int code = 0;
16198 
16199   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16200     {
16201       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16202       gcc_assert (parts.index == NULL_RTX);
16203       parts.index = XVECEXP (addr, 0, 1);
16204       parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16205       addr = XVECEXP (addr, 0, 0);
16206       vsib = true;
16207     }
16208   else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16209     {
16210       gcc_assert (TARGET_64BIT);
16211       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16212       code = 'q';
16213     }
16214   else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16215     {
16216       ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16217       gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16218       if (parts.base != NULL_RTX)
16219 	{
16220 	  parts.index = parts.base;
16221 	  parts.scale = 1;
16222 	}
16223       parts.base = XVECEXP (addr, 0, 0);
16224       addr = XVECEXP (addr, 0, 0);
16225     }
16226   else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16227     {
16228       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16229       gcc_assert (parts.index == NULL_RTX);
16230       parts.index = XVECEXP (addr, 0, 1);
16231       addr = XVECEXP (addr, 0, 0);
16232     }
16233   else
16234     ok = ix86_decompose_address (addr, &parts);
16235 
16236   gcc_assert (ok);
16237 
16238   base = parts.base;
16239   index = parts.index;
16240   disp = parts.disp;
16241   scale = parts.scale;
16242 
16243   switch (parts.seg)
16244     {
16245     case SEG_DEFAULT:
16246       break;
16247     case SEG_FS:
16248     case SEG_GS:
16249       if (ASSEMBLER_DIALECT == ASM_ATT)
16250 	putc ('%', file);
16251       fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16252       break;
16253     default:
16254       gcc_unreachable ();
16255     }
16256 
16257   /* Use one byte shorter RIP relative addressing for 64bit mode.  */
16258   if (TARGET_64BIT && !base && !index)
16259     {
16260       rtx symbol = disp;
16261 
16262       if (GET_CODE (disp) == CONST
16263 	  && GET_CODE (XEXP (disp, 0)) == PLUS
16264 	  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16265 	symbol = XEXP (XEXP (disp, 0), 0);
16266 
16267       if (GET_CODE (symbol) == LABEL_REF
16268 	  || (GET_CODE (symbol) == SYMBOL_REF
16269 	      && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16270 	base = pc_rtx;
16271     }
16272   if (!base && !index)
16273     {
16274       /* Displacement only requires special attention.  */
16275 
16276       if (CONST_INT_P (disp))
16277 	{
16278 	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16279 	    fputs ("ds:", file);
16280 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16281 	}
16282       else if (flag_pic)
16283 	output_pic_addr_const (file, disp, 0);
16284       else
16285 	output_addr_const (file, disp);
16286     }
16287   else
16288     {
16289       /* Print SImode register names to force addr32 prefix.  */
16290       if (SImode_address_operand (addr, VOIDmode))
16291 	{
16292 #ifdef ENABLE_CHECKING
16293 	  gcc_assert (TARGET_64BIT);
16294 	  switch (GET_CODE (addr))
16295 	    {
16296 	    case SUBREG:
16297 	      gcc_assert (GET_MODE (addr) == SImode);
16298 	      gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16299 	      break;
16300 	    case ZERO_EXTEND:
16301 	    case AND:
16302 	      gcc_assert (GET_MODE (addr) == DImode);
16303 	      break;
16304 	    default:
16305 	      gcc_unreachable ();
16306 	    }
16307 #endif
16308 	  gcc_assert (!code);
16309 	  code = 'k';
16310 	}
16311       else if (code == 0
16312 	       && TARGET_X32
16313 	       && disp
16314 	       && CONST_INT_P (disp)
16315 	       && INTVAL (disp) < -16*1024*1024)
16316 	{
16317 	  /* X32 runs in 64-bit mode, where displacement, DISP, in
16318 	     address DISP(%r64), is encoded as 32-bit immediate sign-
16319 	     extended from 32-bit to 64-bit.  For -0x40000300(%r64),
16320 	     address is %r64 + 0xffffffffbffffd00.  When %r64 <
16321 	     0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16322 	     which is invalid for x32.  The correct address is %r64
16323 	     - 0x40000300 == 0xf7ffdd64.  To properly encode
16324 	     -0x40000300(%r64) for x32, we zero-extend negative
16325 	     displacement by forcing addr32 prefix which truncates
16326 	     0xfffffffff7ffdd64 to 0xf7ffdd64.  In theory, we should
16327 	     zero-extend all negative displacements, including -1(%rsp).
16328 	     However, for small negative displacements, sign-extension
16329 	     won't cause overflow.  We only zero-extend negative
16330 	     displacements if they < -16*1024*1024, which is also used
16331 	     to check legitimate address displacements for PIC.  */
16332 	  code = 'k';
16333 	}
16334 
16335       if (ASSEMBLER_DIALECT == ASM_ATT)
16336 	{
16337 	  if (disp)
16338 	    {
16339 	      if (flag_pic)
16340 		output_pic_addr_const (file, disp, 0);
16341 	      else if (GET_CODE (disp) == LABEL_REF)
16342 		output_asm_label (disp);
16343 	      else
16344 		output_addr_const (file, disp);
16345 	    }
16346 
16347 	  putc ('(', file);
16348 	  if (base)
16349 	    print_reg (base, code, file);
16350 	  if (index)
16351 	    {
16352 	      putc (',', file);
16353 	      print_reg (index, vsib ? 0 : code, file);
16354 	      if (scale != 1 || vsib)
16355 		fprintf (file, ",%d", scale);
16356 	    }
16357 	  putc (')', file);
16358 	}
16359       else
16360 	{
16361 	  rtx offset = NULL_RTX;
16362 
16363 	  if (disp)
16364 	    {
16365 	      /* Pull out the offset of a symbol; print any symbol itself.  */
16366 	      if (GET_CODE (disp) == CONST
16367 		  && GET_CODE (XEXP (disp, 0)) == PLUS
16368 		  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16369 		{
16370 		  offset = XEXP (XEXP (disp, 0), 1);
16371 		  disp = gen_rtx_CONST (VOIDmode,
16372 					XEXP (XEXP (disp, 0), 0));
16373 		}
16374 
16375 	      if (flag_pic)
16376 		output_pic_addr_const (file, disp, 0);
16377 	      else if (GET_CODE (disp) == LABEL_REF)
16378 		output_asm_label (disp);
16379 	      else if (CONST_INT_P (disp))
16380 		offset = disp;
16381 	      else
16382 		output_addr_const (file, disp);
16383 	    }
16384 
16385 	  putc ('[', file);
16386 	  if (base)
16387 	    {
16388 	      print_reg (base, code, file);
16389 	      if (offset)
16390 		{
16391 		  if (INTVAL (offset) >= 0)
16392 		    putc ('+', file);
16393 		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16394 		}
16395 	    }
16396 	  else if (offset)
16397 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16398 	  else
16399 	    putc ('0', file);
16400 
16401 	  if (index)
16402 	    {
16403 	      putc ('+', file);
16404 	      print_reg (index, vsib ? 0 : code, file);
16405 	      if (scale != 1 || vsib)
16406 		fprintf (file, "*%d", scale);
16407 	    }
16408 	  putc (']', file);
16409 	}
16410     }
16411 }
16412 
16413 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
16414 
16415 static bool
16416 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16417 {
16418   rtx op;
16419 
16420   if (GET_CODE (x) != UNSPEC)
16421     return false;
16422 
16423   op = XVECEXP (x, 0, 0);
16424   switch (XINT (x, 1))
16425     {
16426     case UNSPEC_GOTTPOFF:
16427       output_addr_const (file, op);
16428       /* FIXME: This might be @TPOFF in Sun ld.  */
16429       fputs ("@gottpoff", file);
16430       break;
16431     case UNSPEC_TPOFF:
16432       output_addr_const (file, op);
16433       fputs ("@tpoff", file);
16434       break;
16435     case UNSPEC_NTPOFF:
16436       output_addr_const (file, op);
16437       if (TARGET_64BIT)
16438 	fputs ("@tpoff", file);
16439       else
16440 	fputs ("@ntpoff", file);
16441       break;
16442     case UNSPEC_DTPOFF:
16443       output_addr_const (file, op);
16444       fputs ("@dtpoff", file);
16445       break;
16446     case UNSPEC_GOTNTPOFF:
16447       output_addr_const (file, op);
16448       if (TARGET_64BIT)
16449 	fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16450 	       "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16451       else
16452 	fputs ("@gotntpoff", file);
16453       break;
16454     case UNSPEC_INDNTPOFF:
16455       output_addr_const (file, op);
16456       fputs ("@indntpoff", file);
16457       break;
16458 #if TARGET_MACHO
16459     case UNSPEC_MACHOPIC_OFFSET:
16460       output_addr_const (file, op);
16461       putc ('-', file);
16462       machopic_output_function_base_name (file);
16463       break;
16464 #endif
16465 
16466     case UNSPEC_STACK_CHECK:
16467       {
16468 	int offset;
16469 
16470 	gcc_assert (flag_split_stack);
16471 
16472 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16473 	offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16474 #else
16475 	gcc_unreachable ();
16476 #endif
16477 
16478 	fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16479       }
16480       break;
16481 
16482     default:
16483       return false;
16484     }
16485 
16486   return true;
16487 }
16488 
16489 /* Split one or more double-mode RTL references into pairs of half-mode
16490    references.  The RTL can be REG, offsettable MEM, integer constant, or
16491    CONST_DOUBLE.  "operands" is a pointer to an array of double-mode RTLs to
16492    split and "num" is its length.  lo_half and hi_half are output arrays
16493    that parallel "operands".  */
16494 
16495 void
16496 split_double_mode (machine_mode mode, rtx operands[],
16497 		   int num, rtx lo_half[], rtx hi_half[])
16498 {
16499   machine_mode half_mode;
16500   unsigned int byte;
16501 
16502   switch (mode)
16503     {
16504     case TImode:
16505       half_mode = DImode;
16506       break;
16507     case DImode:
16508       half_mode = SImode;
16509       break;
16510     default:
16511       gcc_unreachable ();
16512     }
16513 
16514   byte = GET_MODE_SIZE (half_mode);
16515 
16516   while (num--)
16517     {
16518       rtx op = operands[num];
16519 
16520       /* simplify_subreg refuse to split volatile memory addresses,
16521          but we still have to handle it.  */
16522       if (MEM_P (op))
16523 	{
16524 	  lo_half[num] = adjust_address (op, half_mode, 0);
16525 	  hi_half[num] = adjust_address (op, half_mode, byte);
16526 	}
16527       else
16528 	{
16529 	  lo_half[num] = simplify_gen_subreg (half_mode, op,
16530 					      GET_MODE (op) == VOIDmode
16531 					      ? mode : GET_MODE (op), 0);
16532 	  hi_half[num] = simplify_gen_subreg (half_mode, op,
16533 					      GET_MODE (op) == VOIDmode
16534 					      ? mode : GET_MODE (op), byte);
16535 	}
16536     }
16537 }
16538 
16539 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16540    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
16541    is the expression of the binary operation.  The output may either be
16542    emitted here, or returned to the caller, like all output_* functions.
16543 
16544    There is no guarantee that the operands are the same mode, as they
16545    might be within FLOAT or FLOAT_EXTEND expressions.  */
16546 
16547 #ifndef SYSV386_COMPAT
16548 /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
16549    wants to fix the assemblers because that causes incompatibility
16550    with gcc.  No-one wants to fix gcc because that causes
16551    incompatibility with assemblers...  You can use the option of
16552    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
16553 #define SYSV386_COMPAT 1
16554 #endif
16555 
16556 const char *
16557 output_387_binary_op (rtx insn, rtx *operands)
16558 {
16559   static char buf[40];
16560   const char *p;
16561   const char *ssep;
16562   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16563 
16564 #ifdef ENABLE_CHECKING
16565   /* Even if we do not want to check the inputs, this documents input
16566      constraints.  Which helps in understanding the following code.  */
16567   if (STACK_REG_P (operands[0])
16568       && ((REG_P (operands[1])
16569 	   && REGNO (operands[0]) == REGNO (operands[1])
16570 	   && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16571 	  || (REG_P (operands[2])
16572 	      && REGNO (operands[0]) == REGNO (operands[2])
16573 	      && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16574       && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16575     ; /* ok */
16576   else
16577     gcc_assert (is_sse);
16578 #endif
16579 
16580   switch (GET_CODE (operands[3]))
16581     {
16582     case PLUS:
16583       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16584 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16585 	p = "fiadd";
16586       else
16587 	p = "fadd";
16588       ssep = "vadd";
16589       break;
16590 
16591     case MINUS:
16592       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16593 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16594 	p = "fisub";
16595       else
16596 	p = "fsub";
16597       ssep = "vsub";
16598       break;
16599 
16600     case MULT:
16601       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16602 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16603 	p = "fimul";
16604       else
16605 	p = "fmul";
16606       ssep = "vmul";
16607       break;
16608 
16609     case DIV:
16610       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16611 	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16612 	p = "fidiv";
16613       else
16614 	p = "fdiv";
16615       ssep = "vdiv";
16616       break;
16617 
16618     default:
16619       gcc_unreachable ();
16620     }
16621 
16622   if (is_sse)
16623    {
16624      if (TARGET_AVX)
16625        {
16626 	 strcpy (buf, ssep);
16627 	 if (GET_MODE (operands[0]) == SFmode)
16628 	   strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16629 	 else
16630 	   strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16631        }
16632      else
16633        {
16634 	 strcpy (buf, ssep + 1);
16635 	 if (GET_MODE (operands[0]) == SFmode)
16636 	   strcat (buf, "ss\t{%2, %0|%0, %2}");
16637 	 else
16638 	   strcat (buf, "sd\t{%2, %0|%0, %2}");
16639        }
16640       return buf;
16641    }
16642   strcpy (buf, p);
16643 
16644   switch (GET_CODE (operands[3]))
16645     {
16646     case MULT:
16647     case PLUS:
16648       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16649 	std::swap (operands[1], operands[2]);
16650 
16651       /* know operands[0] == operands[1].  */
16652 
16653       if (MEM_P (operands[2]))
16654 	{
16655 	  p = "%Z2\t%2";
16656 	  break;
16657 	}
16658 
16659       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16660 	{
16661 	  if (STACK_TOP_P (operands[0]))
16662 	    /* How is it that we are storing to a dead operand[2]?
16663 	       Well, presumably operands[1] is dead too.  We can't
16664 	       store the result to st(0) as st(0) gets popped on this
16665 	       instruction.  Instead store to operands[2] (which I
16666 	       think has to be st(1)).  st(1) will be popped later.
16667 	       gcc <= 2.8.1 didn't have this check and generated
16668 	       assembly code that the Unixware assembler rejected.  */
16669 	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
16670 	  else
16671 	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
16672 	  break;
16673 	}
16674 
16675       if (STACK_TOP_P (operands[0]))
16676 	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
16677       else
16678 	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
16679       break;
16680 
16681     case MINUS:
16682     case DIV:
16683       if (MEM_P (operands[1]))
16684 	{
16685 	  p = "r%Z1\t%1";
16686 	  break;
16687 	}
16688 
16689       if (MEM_P (operands[2]))
16690 	{
16691 	  p = "%Z2\t%2";
16692 	  break;
16693 	}
16694 
16695       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16696 	{
16697 #if SYSV386_COMPAT
16698 	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16699 	     derived assemblers, confusingly reverse the direction of
16700 	     the operation for fsub{r} and fdiv{r} when the
16701 	     destination register is not st(0).  The Intel assembler
16702 	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
16703 	     figure out what the hardware really does.  */
16704 	  if (STACK_TOP_P (operands[0]))
16705 	    p = "{p\t%0, %2|rp\t%2, %0}";
16706 	  else
16707 	    p = "{rp\t%2, %0|p\t%0, %2}";
16708 #else
16709 	  if (STACK_TOP_P (operands[0]))
16710 	    /* As above for fmul/fadd, we can't store to st(0).  */
16711 	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
16712 	  else
16713 	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
16714 #endif
16715 	  break;
16716 	}
16717 
16718       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16719 	{
16720 #if SYSV386_COMPAT
16721 	  if (STACK_TOP_P (operands[0]))
16722 	    p = "{rp\t%0, %1|p\t%1, %0}";
16723 	  else
16724 	    p = "{p\t%1, %0|rp\t%0, %1}";
16725 #else
16726 	  if (STACK_TOP_P (operands[0]))
16727 	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
16728 	  else
16729 	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
16730 #endif
16731 	  break;
16732 	}
16733 
16734       if (STACK_TOP_P (operands[0]))
16735 	{
16736 	  if (STACK_TOP_P (operands[1]))
16737 	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
16738 	  else
16739 	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
16740 	  break;
16741 	}
16742       else if (STACK_TOP_P (operands[1]))
16743 	{
16744 #if SYSV386_COMPAT
16745 	  p = "{\t%1, %0|r\t%0, %1}";
16746 #else
16747 	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
16748 #endif
16749 	}
16750       else
16751 	{
16752 #if SYSV386_COMPAT
16753 	  p = "{r\t%2, %0|\t%0, %2}";
16754 #else
16755 	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
16756 #endif
16757 	}
16758       break;
16759 
16760     default:
16761       gcc_unreachable ();
16762     }
16763 
16764   strcat (buf, p);
16765   return buf;
16766 }
16767 
16768 /* Check if a 256bit AVX register is referenced inside of EXP.   */
16769 
16770 static bool
16771 ix86_check_avx256_register (const_rtx exp)
16772 {
16773   if (GET_CODE (exp) == SUBREG)
16774     exp = SUBREG_REG (exp);
16775 
16776   return (REG_P (exp)
16777 	  && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16778 }
16779 
16780 /* Return needed mode for entity in optimize_mode_switching pass.  */
16781 
16782 static int
16783 ix86_avx_u128_mode_needed (rtx_insn *insn)
16784 {
16785   if (CALL_P (insn))
16786     {
16787       rtx link;
16788 
16789       /* Needed mode is set to AVX_U128_CLEAN if there are
16790 	 no 256bit modes used in function arguments.  */
16791       for (link = CALL_INSN_FUNCTION_USAGE (insn);
16792 	   link;
16793 	   link = XEXP (link, 1))
16794 	{
16795 	  if (GET_CODE (XEXP (link, 0)) == USE)
16796 	    {
16797 	      rtx arg = XEXP (XEXP (link, 0), 0);
16798 
16799 	      if (ix86_check_avx256_register (arg))
16800 		return AVX_U128_DIRTY;
16801 	    }
16802 	}
16803 
16804       return AVX_U128_CLEAN;
16805     }
16806 
16807   /* Require DIRTY mode if a 256bit AVX register is referenced.  Hardware
16808      changes state only when a 256bit register is written to, but we need
16809      to prevent the compiler from moving optimal insertion point above
16810      eventual read from 256bit register.  */
16811   subrtx_iterator::array_type array;
16812   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16813     if (ix86_check_avx256_register (*iter))
16814       return AVX_U128_DIRTY;
16815 
16816   return AVX_U128_ANY;
16817 }
16818 
16819 /* Return mode that i387 must be switched into
16820    prior to the execution of insn.  */
16821 
16822 static int
16823 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16824 {
16825   enum attr_i387_cw mode;
16826 
16827   /* The mode UNINITIALIZED is used to store control word after a
16828      function call or ASM pattern.  The mode ANY specify that function
16829      has no requirements on the control word and make no changes in the
16830      bits we are interested in.  */
16831 
16832   if (CALL_P (insn)
16833       || (NONJUMP_INSN_P (insn)
16834 	  && (asm_noperands (PATTERN (insn)) >= 0
16835 	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16836     return I387_CW_UNINITIALIZED;
16837 
16838   if (recog_memoized (insn) < 0)
16839     return I387_CW_ANY;
16840 
16841   mode = get_attr_i387_cw (insn);
16842 
16843   switch (entity)
16844     {
16845     case I387_TRUNC:
16846       if (mode == I387_CW_TRUNC)
16847 	return mode;
16848       break;
16849 
16850     case I387_FLOOR:
16851       if (mode == I387_CW_FLOOR)
16852 	return mode;
16853       break;
16854 
16855     case I387_CEIL:
16856       if (mode == I387_CW_CEIL)
16857 	return mode;
16858       break;
16859 
16860     case I387_MASK_PM:
16861       if (mode == I387_CW_MASK_PM)
16862 	return mode;
16863       break;
16864 
16865     default:
16866       gcc_unreachable ();
16867     }
16868 
16869   return I387_CW_ANY;
16870 }
16871 
16872 /* Return mode that entity must be switched into
16873    prior to the execution of insn.  */
16874 
16875 static int
16876 ix86_mode_needed (int entity, rtx_insn *insn)
16877 {
16878   switch (entity)
16879     {
16880     case AVX_U128:
16881       return ix86_avx_u128_mode_needed (insn);
16882     case I387_TRUNC:
16883     case I387_FLOOR:
16884     case I387_CEIL:
16885     case I387_MASK_PM:
16886       return ix86_i387_mode_needed (entity, insn);
16887     default:
16888       gcc_unreachable ();
16889     }
16890   return 0;
16891 }
16892 
16893 /* Check if a 256bit AVX register is referenced in stores.   */
16894 
16895 static void
16896 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16897  {
16898    if (ix86_check_avx256_register (dest))
16899     {
16900       bool *used = (bool *) data;
16901       *used = true;
16902     }
16903  }
16904 
16905 /* Calculate mode of upper 128bit AVX registers after the insn.  */
16906 
16907 static int
16908 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16909 {
16910   rtx pat = PATTERN (insn);
16911 
16912   if (vzeroupper_operation (pat, VOIDmode)
16913       || vzeroall_operation (pat, VOIDmode))
16914     return AVX_U128_CLEAN;
16915 
16916   /* We know that state is clean after CALL insn if there are no
16917      256bit registers used in the function return register.  */
16918   if (CALL_P (insn))
16919     {
16920       bool avx_reg256_found = false;
16921       note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16922 
16923       return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16924     }
16925 
16926   /* Otherwise, return current mode.  Remember that if insn
16927      references AVX 256bit registers, the mode was already changed
16928      to DIRTY from MODE_NEEDED.  */
16929   return mode;
16930 }
16931 
16932 /* Return the mode that an insn results in.  */
16933 
16934 static int
16935 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16936 {
16937   switch (entity)
16938     {
16939     case AVX_U128:
16940       return ix86_avx_u128_mode_after (mode, insn);
16941     case I387_TRUNC:
16942     case I387_FLOOR:
16943     case I387_CEIL:
16944     case I387_MASK_PM:
16945       return mode;
16946     default:
16947       gcc_unreachable ();
16948     }
16949 }
16950 
16951 static int
16952 ix86_avx_u128_mode_entry (void)
16953 {
16954   tree arg;
16955 
16956   /* Entry mode is set to AVX_U128_DIRTY if there are
16957      256bit modes used in function arguments.  */
16958   for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16959        arg = TREE_CHAIN (arg))
16960     {
16961       rtx incoming = DECL_INCOMING_RTL (arg);
16962 
16963       if (incoming && ix86_check_avx256_register (incoming))
16964 	return AVX_U128_DIRTY;
16965     }
16966 
16967   return AVX_U128_CLEAN;
16968 }
16969 
16970 /* Return a mode that ENTITY is assumed to be
16971    switched to at function entry.  */
16972 
16973 static int
16974 ix86_mode_entry (int entity)
16975 {
16976   switch (entity)
16977     {
16978     case AVX_U128:
16979       return ix86_avx_u128_mode_entry ();
16980     case I387_TRUNC:
16981     case I387_FLOOR:
16982     case I387_CEIL:
16983     case I387_MASK_PM:
16984       return I387_CW_ANY;
16985     default:
16986       gcc_unreachable ();
16987     }
16988 }
16989 
16990 static int
16991 ix86_avx_u128_mode_exit (void)
16992 {
16993   rtx reg = crtl->return_rtx;
16994 
16995   /* Exit mode is set to AVX_U128_DIRTY if there are
16996      256bit modes used in the function return register.  */
16997   if (reg && ix86_check_avx256_register (reg))
16998     return AVX_U128_DIRTY;
16999 
17000   return AVX_U128_CLEAN;
17001 }
17002 
17003 /* Return a mode that ENTITY is assumed to be
17004    switched to at function exit.  */
17005 
17006 static int
17007 ix86_mode_exit (int entity)
17008 {
17009   switch (entity)
17010     {
17011     case AVX_U128:
17012       return ix86_avx_u128_mode_exit ();
17013     case I387_TRUNC:
17014     case I387_FLOOR:
17015     case I387_CEIL:
17016     case I387_MASK_PM:
17017       return I387_CW_ANY;
17018     default:
17019       gcc_unreachable ();
17020     }
17021 }
17022 
17023 static int
17024 ix86_mode_priority (int, int n)
17025 {
17026   return n;
17027 }
17028 
17029 /* Output code to initialize control word copies used by trunc?f?i and
17030    rounding patterns.  CURRENT_MODE is set to current control word,
17031    while NEW_MODE is set to new control word.  */
17032 
17033 static void
17034 emit_i387_cw_initialization (int mode)
17035 {
17036   rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
17037   rtx new_mode;
17038 
17039   enum ix86_stack_slot slot;
17040 
17041   rtx reg = gen_reg_rtx (HImode);
17042 
17043   emit_insn (gen_x86_fnstcw_1 (stored_mode));
17044   emit_move_insn (reg, copy_rtx (stored_mode));
17045 
17046   if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
17047       || optimize_insn_for_size_p ())
17048     {
17049       switch (mode)
17050 	{
17051 	case I387_CW_TRUNC:
17052 	  /* round toward zero (truncate) */
17053 	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
17054 	  slot = SLOT_CW_TRUNC;
17055 	  break;
17056 
17057 	case I387_CW_FLOOR:
17058 	  /* round down toward -oo */
17059 	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17060 	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
17061 	  slot = SLOT_CW_FLOOR;
17062 	  break;
17063 
17064 	case I387_CW_CEIL:
17065 	  /* round up toward +oo */
17066 	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17067 	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
17068 	  slot = SLOT_CW_CEIL;
17069 	  break;
17070 
17071 	case I387_CW_MASK_PM:
17072 	  /* mask precision exception for nearbyint() */
17073 	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17074 	  slot = SLOT_CW_MASK_PM;
17075 	  break;
17076 
17077 	default:
17078 	  gcc_unreachable ();
17079 	}
17080     }
17081   else
17082     {
17083       switch (mode)
17084 	{
17085 	case I387_CW_TRUNC:
17086 	  /* round toward zero (truncate) */
17087 	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
17088 	  slot = SLOT_CW_TRUNC;
17089 	  break;
17090 
17091 	case I387_CW_FLOOR:
17092 	  /* round down toward -oo */
17093 	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
17094 	  slot = SLOT_CW_FLOOR;
17095 	  break;
17096 
17097 	case I387_CW_CEIL:
17098 	  /* round up toward +oo */
17099 	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
17100 	  slot = SLOT_CW_CEIL;
17101 	  break;
17102 
17103 	case I387_CW_MASK_PM:
17104 	  /* mask precision exception for nearbyint() */
17105 	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17106 	  slot = SLOT_CW_MASK_PM;
17107 	  break;
17108 
17109 	default:
17110 	  gcc_unreachable ();
17111 	}
17112     }
17113 
17114   gcc_assert (slot < MAX_386_STACK_LOCALS);
17115 
17116   new_mode = assign_386_stack_local (HImode, slot);
17117   emit_move_insn (new_mode, reg);
17118 }
17119 
17120 /* Emit vzeroupper.  */
17121 
17122 void
17123 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
17124 {
17125   int i;
17126 
17127   /* Cancel automatic vzeroupper insertion if there are
17128      live call-saved SSE registers at the insertion point.  */
17129 
17130   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17131     if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17132       return;
17133 
17134   if (TARGET_64BIT)
17135     for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17136       if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17137 	return;
17138 
17139   emit_insn (gen_avx_vzeroupper ());
17140 }
17141 
17142 /* Generate one or more insns to set ENTITY to MODE.  */
17143 
17144 /* Generate one or more insns to set ENTITY to MODE.  HARD_REG_LIVE
17145    is the set of hard registers live at the point where the insn(s)
17146    are to be inserted.  */
17147 
17148 static void
17149 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17150 		    HARD_REG_SET regs_live)
17151 {
17152   switch (entity)
17153     {
17154     case AVX_U128:
17155       if (mode == AVX_U128_CLEAN)
17156 	ix86_avx_emit_vzeroupper (regs_live);
17157       break;
17158     case I387_TRUNC:
17159     case I387_FLOOR:
17160     case I387_CEIL:
17161     case I387_MASK_PM:
17162       if (mode != I387_CW_ANY
17163 	  && mode != I387_CW_UNINITIALIZED)
17164 	emit_i387_cw_initialization (mode);
17165       break;
17166     default:
17167       gcc_unreachable ();
17168     }
17169 }
17170 
17171 /* Output code for INSN to convert a float to a signed int.  OPERANDS
17172    are the insn operands.  The output may be [HSD]Imode and the input
17173    operand may be [SDX]Fmode.  */
17174 
17175 const char *
17176 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17177 {
17178   int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17179   int dimode_p = GET_MODE (operands[0]) == DImode;
17180   int round_mode = get_attr_i387_cw (insn);
17181 
17182   /* Jump through a hoop or two for DImode, since the hardware has no
17183      non-popping instruction.  We used to do this a different way, but
17184      that was somewhat fragile and broke with post-reload splitters.  */
17185   if ((dimode_p || fisttp) && !stack_top_dies)
17186     output_asm_insn ("fld\t%y1", operands);
17187 
17188   gcc_assert (STACK_TOP_P (operands[1]));
17189   gcc_assert (MEM_P (operands[0]));
17190   gcc_assert (GET_MODE (operands[1]) != TFmode);
17191 
17192   if (fisttp)
17193       output_asm_insn ("fisttp%Z0\t%0", operands);
17194   else
17195     {
17196       if (round_mode != I387_CW_ANY)
17197 	output_asm_insn ("fldcw\t%3", operands);
17198       if (stack_top_dies || dimode_p)
17199 	output_asm_insn ("fistp%Z0\t%0", operands);
17200       else
17201 	output_asm_insn ("fist%Z0\t%0", operands);
17202       if (round_mode != I387_CW_ANY)
17203 	output_asm_insn ("fldcw\t%2", operands);
17204     }
17205 
17206   return "";
17207 }
17208 
17209 /* Output code for x87 ffreep insn.  The OPNO argument, which may only
17210    have the values zero or one, indicates the ffreep insn's operand
17211    from the OPERANDS array.  */
17212 
17213 static const char *
17214 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17215 {
17216   if (TARGET_USE_FFREEP)
17217 #ifdef HAVE_AS_IX86_FFREEP
17218     return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17219 #else
17220     {
17221       static char retval[32];
17222       int regno = REGNO (operands[opno]);
17223 
17224       gcc_assert (STACK_REGNO_P (regno));
17225 
17226       regno -= FIRST_STACK_REG;
17227 
17228       snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17229       return retval;
17230     }
17231 #endif
17232 
17233   return opno ? "fstp\t%y1" : "fstp\t%y0";
17234 }
17235 
17236 
17237 /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
17238    should be used.  UNORDERED_P is true when fucom should be used.  */
17239 
17240 const char *
17241 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17242 {
17243   int stack_top_dies;
17244   rtx cmp_op0, cmp_op1;
17245   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17246 
17247   if (eflags_p)
17248     {
17249       cmp_op0 = operands[0];
17250       cmp_op1 = operands[1];
17251     }
17252   else
17253     {
17254       cmp_op0 = operands[1];
17255       cmp_op1 = operands[2];
17256     }
17257 
17258   if (is_sse)
17259     {
17260       if (GET_MODE (operands[0]) == SFmode)
17261 	if (unordered_p)
17262 	  return "%vucomiss\t{%1, %0|%0, %1}";
17263 	else
17264 	  return "%vcomiss\t{%1, %0|%0, %1}";
17265       else
17266 	if (unordered_p)
17267 	  return "%vucomisd\t{%1, %0|%0, %1}";
17268 	else
17269 	  return "%vcomisd\t{%1, %0|%0, %1}";
17270     }
17271 
17272   gcc_assert (STACK_TOP_P (cmp_op0));
17273 
17274   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17275 
17276   if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17277     {
17278       if (stack_top_dies)
17279 	{
17280 	  output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17281 	  return output_387_ffreep (operands, 1);
17282 	}
17283       else
17284 	return "ftst\n\tfnstsw\t%0";
17285     }
17286 
17287   if (STACK_REG_P (cmp_op1)
17288       && stack_top_dies
17289       && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17290       && REGNO (cmp_op1) != FIRST_STACK_REG)
17291     {
17292       /* If both the top of the 387 stack dies, and the other operand
17293 	 is also a stack register that dies, then this must be a
17294 	 `fcompp' float compare */
17295 
17296       if (eflags_p)
17297 	{
17298 	  /* There is no double popping fcomi variant.  Fortunately,
17299 	     eflags is immune from the fstp's cc clobbering.  */
17300 	  if (unordered_p)
17301 	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17302 	  else
17303 	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17304 	  return output_387_ffreep (operands, 0);
17305 	}
17306       else
17307 	{
17308 	  if (unordered_p)
17309 	    return "fucompp\n\tfnstsw\t%0";
17310 	  else
17311 	    return "fcompp\n\tfnstsw\t%0";
17312 	}
17313     }
17314   else
17315     {
17316       /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
17317 
17318       static const char * const alt[16] =
17319       {
17320 	"fcom%Z2\t%y2\n\tfnstsw\t%0",
17321 	"fcomp%Z2\t%y2\n\tfnstsw\t%0",
17322 	"fucom%Z2\t%y2\n\tfnstsw\t%0",
17323 	"fucomp%Z2\t%y2\n\tfnstsw\t%0",
17324 
17325 	"ficom%Z2\t%y2\n\tfnstsw\t%0",
17326 	"ficomp%Z2\t%y2\n\tfnstsw\t%0",
17327 	NULL,
17328 	NULL,
17329 
17330 	"fcomi\t{%y1, %0|%0, %y1}",
17331 	"fcomip\t{%y1, %0|%0, %y1}",
17332 	"fucomi\t{%y1, %0|%0, %y1}",
17333 	"fucomip\t{%y1, %0|%0, %y1}",
17334 
17335 	NULL,
17336 	NULL,
17337 	NULL,
17338 	NULL
17339       };
17340 
17341       int mask;
17342       const char *ret;
17343 
17344       mask  = eflags_p << 3;
17345       mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17346       mask |= unordered_p << 1;
17347       mask |= stack_top_dies;
17348 
17349       gcc_assert (mask < 16);
17350       ret = alt[mask];
17351       gcc_assert (ret);
17352 
17353       return ret;
17354     }
17355 }
17356 
17357 void
17358 ix86_output_addr_vec_elt (FILE *file, int value)
17359 {
17360   const char *directive = ASM_LONG;
17361 
17362 #ifdef ASM_QUAD
17363   if (TARGET_LP64)
17364     directive = ASM_QUAD;
17365 #else
17366   gcc_assert (!TARGET_64BIT);
17367 #endif
17368 
17369   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17370 }
17371 
17372 void
17373 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17374 {
17375   const char *directive = ASM_LONG;
17376 
17377 #ifdef ASM_QUAD
17378   if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17379     directive = ASM_QUAD;
17380 #else
17381   gcc_assert (!TARGET_64BIT);
17382 #endif
17383   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
17384   if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17385     fprintf (file, "%s%s%d-%s%d\n",
17386 	     directive, LPREFIX, value, LPREFIX, rel);
17387   else if (HAVE_AS_GOTOFF_IN_DATA)
17388     fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17389 #if TARGET_MACHO
17390   else if (TARGET_MACHO)
17391     {
17392       fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17393       machopic_output_function_base_name (file);
17394       putc ('\n', file);
17395     }
17396 #endif
17397   else
17398     asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17399 		 GOT_SYMBOL_NAME, LPREFIX, value);
17400 }
17401 
17402 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17403    for the target.  */
17404 
17405 void
17406 ix86_expand_clear (rtx dest)
17407 {
17408   rtx tmp;
17409 
17410   /* We play register width games, which are only valid after reload.  */
17411   gcc_assert (reload_completed);
17412 
17413   /* Avoid HImode and its attendant prefix byte.  */
17414   if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17415     dest = gen_rtx_REG (SImode, REGNO (dest));
17416   tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17417 
17418   if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17419     {
17420       rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17421       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17422     }
17423 
17424   emit_insn (tmp);
17425 }
17426 
17427 /* X is an unchanging MEM.  If it is a constant pool reference, return
17428    the constant pool rtx, else NULL.  */
17429 
17430 rtx
17431 maybe_get_pool_constant (rtx x)
17432 {
17433   x = ix86_delegitimize_address (XEXP (x, 0));
17434 
17435   if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17436     return get_pool_constant (x);
17437 
17438   return NULL_RTX;
17439 }
17440 
17441 void
17442 ix86_expand_move (machine_mode mode, rtx operands[])
17443 {
17444   rtx op0, op1;
17445   enum tls_model model;
17446 
17447   op0 = operands[0];
17448   op1 = operands[1];
17449 
17450   if (GET_CODE (op1) == SYMBOL_REF)
17451     {
17452       rtx tmp;
17453 
17454       model = SYMBOL_REF_TLS_MODEL (op1);
17455       if (model)
17456 	{
17457 	  op1 = legitimize_tls_address (op1, model, true);
17458 	  op1 = force_operand (op1, op0);
17459 	  if (op1 == op0)
17460 	    return;
17461 	  op1 = convert_to_mode (mode, op1, 1);
17462 	}
17463       else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17464 	op1 = tmp;
17465     }
17466   else if (GET_CODE (op1) == CONST
17467 	   && GET_CODE (XEXP (op1, 0)) == PLUS
17468 	   && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17469     {
17470       rtx addend = XEXP (XEXP (op1, 0), 1);
17471       rtx symbol = XEXP (XEXP (op1, 0), 0);
17472       rtx tmp;
17473 
17474       model = SYMBOL_REF_TLS_MODEL (symbol);
17475       if (model)
17476 	tmp = legitimize_tls_address (symbol, model, true);
17477       else
17478         tmp = legitimize_pe_coff_symbol (symbol, true);
17479 
17480       if (tmp)
17481 	{
17482 	  tmp = force_operand (tmp, NULL);
17483 	  tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17484 				     op0, 1, OPTAB_DIRECT);
17485 	  if (tmp == op0)
17486 	    return;
17487 	  op1 = convert_to_mode (mode, tmp, 1);
17488 	}
17489     }
17490 
17491   if ((flag_pic || MACHOPIC_INDIRECT)
17492       && symbolic_operand (op1, mode))
17493     {
17494       if (TARGET_MACHO && !TARGET_64BIT)
17495 	{
17496 #if TARGET_MACHO
17497 	  /* dynamic-no-pic */
17498 	  if (MACHOPIC_INDIRECT)
17499 	    {
17500 	      rtx temp = ((reload_in_progress
17501 			   || ((op0 && REG_P (op0))
17502 			       && mode == Pmode))
17503 			  ? op0 : gen_reg_rtx (Pmode));
17504 	      op1 = machopic_indirect_data_reference (op1, temp);
17505 	      if (MACHOPIC_PURE)
17506 		op1 = machopic_legitimize_pic_address (op1, mode,
17507 						       temp == op1 ? 0 : temp);
17508 	    }
17509 	  if (op0 != op1 && GET_CODE (op0) != MEM)
17510 	    {
17511 	      rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17512 	      emit_insn (insn);
17513 	      return;
17514 	    }
17515 	  if (GET_CODE (op0) == MEM)
17516 	    op1 = force_reg (Pmode, op1);
17517 	  else
17518 	    {
17519 	      rtx temp = op0;
17520 	      if (GET_CODE (temp) != REG)
17521 		temp = gen_reg_rtx (Pmode);
17522 	      temp = legitimize_pic_address (op1, temp);
17523 	      if (temp == op0)
17524 	    return;
17525 	      op1 = temp;
17526 	    }
17527       /* dynamic-no-pic */
17528 #endif
17529 	}
17530       else
17531 	{
17532 	  if (MEM_P (op0))
17533 	    op1 = force_reg (mode, op1);
17534 	  else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17535 	    {
17536 	      rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17537 	      op1 = legitimize_pic_address (op1, reg);
17538 	      if (op0 == op1)
17539 		return;
17540 	      op1 = convert_to_mode (mode, op1, 1);
17541 	    }
17542 	}
17543     }
17544   else
17545     {
17546       if (MEM_P (op0)
17547 	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17548 	      || !push_operand (op0, mode))
17549 	  && MEM_P (op1))
17550 	op1 = force_reg (mode, op1);
17551 
17552       if (push_operand (op0, mode)
17553 	  && ! general_no_elim_operand (op1, mode))
17554 	op1 = copy_to_mode_reg (mode, op1);
17555 
17556       /* Force large constants in 64bit compilation into register
17557 	 to get them CSEed.  */
17558       if (can_create_pseudo_p ()
17559 	  && (mode == DImode) && TARGET_64BIT
17560 	  && immediate_operand (op1, mode)
17561 	  && !x86_64_zext_immediate_operand (op1, VOIDmode)
17562 	  && !register_operand (op0, mode)
17563 	  && optimize)
17564 	op1 = copy_to_mode_reg (mode, op1);
17565 
17566       if (can_create_pseudo_p ()
17567 	  && FLOAT_MODE_P (mode)
17568 	  && GET_CODE (op1) == CONST_DOUBLE)
17569 	{
17570 	  /* If we are loading a floating point constant to a register,
17571 	     force the value to memory now, since we'll get better code
17572 	     out the back end.  */
17573 
17574 	  op1 = validize_mem (force_const_mem (mode, op1));
17575 	  if (!register_operand (op0, mode))
17576 	    {
17577 	      rtx temp = gen_reg_rtx (mode);
17578 	      emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17579 	      emit_move_insn (op0, temp);
17580 	      return;
17581 	    }
17582 	}
17583     }
17584 
17585   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17586 }
17587 
17588 void
17589 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17590 {
17591   rtx op0 = operands[0], op1 = operands[1];
17592   unsigned int align = GET_MODE_ALIGNMENT (mode);
17593 
17594   if (push_operand (op0, VOIDmode))
17595     op0 = emit_move_resolve_push (mode, op0);
17596 
17597   /* Force constants other than zero into memory.  We do not know how
17598      the instructions used to build constants modify the upper 64 bits
17599      of the register, once we have that information we may be able
17600      to handle some of them more efficiently.  */
17601   if (can_create_pseudo_p ()
17602       && register_operand (op0, mode)
17603       && (CONSTANT_P (op1)
17604 	  || (GET_CODE (op1) == SUBREG
17605 	      && CONSTANT_P (SUBREG_REG (op1))))
17606       && !standard_sse_constant_p (op1))
17607     op1 = validize_mem (force_const_mem (mode, op1));
17608 
17609   /* We need to check memory alignment for SSE mode since attribute
17610      can make operands unaligned.  */
17611   if (can_create_pseudo_p ()
17612       && SSE_REG_MODE_P (mode)
17613       && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17614 	  || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17615     {
17616       rtx tmp[2];
17617 
17618       /* ix86_expand_vector_move_misalign() does not like constants ... */
17619       if (CONSTANT_P (op1)
17620 	  || (GET_CODE (op1) == SUBREG
17621 	      && CONSTANT_P (SUBREG_REG (op1))))
17622 	op1 = validize_mem (force_const_mem (mode, op1));
17623 
17624       /* ... nor both arguments in memory.  */
17625       if (!register_operand (op0, mode)
17626 	  && !register_operand (op1, mode))
17627 	op1 = force_reg (mode, op1);
17628 
17629       tmp[0] = op0; tmp[1] = op1;
17630       ix86_expand_vector_move_misalign (mode, tmp);
17631       return;
17632     }
17633 
17634   /* Make operand1 a register if it isn't already.  */
17635   if (can_create_pseudo_p ()
17636       && !register_operand (op0, mode)
17637       && !register_operand (op1, mode))
17638     {
17639       emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17640       return;
17641     }
17642 
17643   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17644 }
17645 
17646 /* Split 32-byte AVX unaligned load and store if needed.  */
17647 
17648 static void
17649 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17650 {
17651   rtx m;
17652   rtx (*extract) (rtx, rtx, rtx);
17653   rtx (*load_unaligned) (rtx, rtx);
17654   rtx (*store_unaligned) (rtx, rtx);
17655   machine_mode mode;
17656 
17657   switch (GET_MODE (op0))
17658     {
17659     default:
17660       gcc_unreachable ();
17661     case V32QImode:
17662       extract = gen_avx_vextractf128v32qi;
17663       load_unaligned = gen_avx_loaddquv32qi;
17664       store_unaligned = gen_avx_storedquv32qi;
17665       mode = V16QImode;
17666       break;
17667     case V8SFmode:
17668       extract = gen_avx_vextractf128v8sf;
17669       load_unaligned = gen_avx_loadups256;
17670       store_unaligned = gen_avx_storeups256;
17671       mode = V4SFmode;
17672       break;
17673     case V4DFmode:
17674       extract = gen_avx_vextractf128v4df;
17675       load_unaligned = gen_avx_loadupd256;
17676       store_unaligned = gen_avx_storeupd256;
17677       mode = V2DFmode;
17678       break;
17679     }
17680 
17681   if (MEM_P (op1))
17682     {
17683       if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17684 	  && optimize_insn_for_speed_p ())
17685 	{
17686 	  rtx r = gen_reg_rtx (mode);
17687 	  m = adjust_address (op1, mode, 0);
17688 	  emit_move_insn (r, m);
17689 	  m = adjust_address (op1, mode, 16);
17690 	  r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17691 	  emit_move_insn (op0, r);
17692 	}
17693       /* Normal *mov<mode>_internal pattern will handle
17694 	 unaligned loads just fine if misaligned_operand
17695 	 is true, and without the UNSPEC it can be combined
17696 	 with arithmetic instructions.  */
17697       else if (misaligned_operand (op1, GET_MODE (op1)))
17698 	emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17699       else
17700 	emit_insn (load_unaligned (op0, op1));
17701     }
17702   else if (MEM_P (op0))
17703     {
17704       if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17705 	  && optimize_insn_for_speed_p ())
17706 	{
17707 	  m = adjust_address (op0, mode, 0);
17708 	  emit_insn (extract (m, op1, const0_rtx));
17709 	  m = adjust_address (op0, mode, 16);
17710 	  emit_insn (extract (m, op1, const1_rtx));
17711 	}
17712       else
17713 	emit_insn (store_unaligned (op0, op1));
17714     }
17715   else
17716     gcc_unreachable ();
17717 }
17718 
17719 /* Implement the movmisalign patterns for SSE.  Non-SSE modes go
17720    straight to ix86_expand_vector_move.  */
17721 /* Code generation for scalar reg-reg moves of single and double precision data:
17722      if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17723        movaps reg, reg
17724      else
17725        movss reg, reg
17726      if (x86_sse_partial_reg_dependency == true)
17727        movapd reg, reg
17728      else
17729        movsd reg, reg
17730 
17731    Code generation for scalar loads of double precision data:
17732      if (x86_sse_split_regs == true)
17733        movlpd mem, reg      (gas syntax)
17734      else
17735        movsd mem, reg
17736 
17737    Code generation for unaligned packed loads of single precision data
17738    (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17739      if (x86_sse_unaligned_move_optimal)
17740        movups mem, reg
17741 
17742      if (x86_sse_partial_reg_dependency == true)
17743        {
17744          xorps  reg, reg
17745          movlps mem, reg
17746          movhps mem+8, reg
17747        }
17748      else
17749        {
17750          movlps mem, reg
17751          movhps mem+8, reg
17752        }
17753 
17754    Code generation for unaligned packed loads of double precision data
17755    (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17756      if (x86_sse_unaligned_move_optimal)
17757        movupd mem, reg
17758 
17759      if (x86_sse_split_regs == true)
17760        {
17761          movlpd mem, reg
17762          movhpd mem+8, reg
17763        }
17764      else
17765        {
17766          movsd  mem, reg
17767          movhpd mem+8, reg
17768        }
17769  */
17770 
17771 void
17772 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17773 {
17774   rtx op0, op1, orig_op0 = NULL_RTX, m;
17775   rtx (*load_unaligned) (rtx, rtx);
17776   rtx (*store_unaligned) (rtx, rtx);
17777 
17778   op0 = operands[0];
17779   op1 = operands[1];
17780 
17781   if (GET_MODE_SIZE (mode) == 64)
17782     {
17783       switch (GET_MODE_CLASS (mode))
17784 	{
17785 	case MODE_VECTOR_INT:
17786 	case MODE_INT:
17787 	  if (GET_MODE (op0) != V16SImode)
17788 	    {
17789 	      if (!MEM_P (op0))
17790 		{
17791 		  orig_op0 = op0;
17792 		  op0 = gen_reg_rtx (V16SImode);
17793 		}
17794 	      else
17795 		op0 = gen_lowpart (V16SImode, op0);
17796 	    }
17797 	  op1 = gen_lowpart (V16SImode, op1);
17798 	  /* FALLTHRU */
17799 
17800 	case MODE_VECTOR_FLOAT:
17801 	  switch (GET_MODE (op0))
17802 	    {
17803 	    default:
17804 	      gcc_unreachable ();
17805 	    case V16SImode:
17806 	      load_unaligned = gen_avx512f_loaddquv16si;
17807 	      store_unaligned = gen_avx512f_storedquv16si;
17808 	      break;
17809 	    case V16SFmode:
17810 	      load_unaligned = gen_avx512f_loadups512;
17811 	      store_unaligned = gen_avx512f_storeups512;
17812 	      break;
17813 	    case V8DFmode:
17814 	      load_unaligned = gen_avx512f_loadupd512;
17815 	      store_unaligned = gen_avx512f_storeupd512;
17816 	      break;
17817 	    }
17818 
17819 	  if (MEM_P (op1))
17820 	    emit_insn (load_unaligned (op0, op1));
17821 	  else if (MEM_P (op0))
17822 	    emit_insn (store_unaligned (op0, op1));
17823 	  else
17824 	    gcc_unreachable ();
17825 	  if (orig_op0)
17826 	    emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17827 	  break;
17828 
17829 	default:
17830 	  gcc_unreachable ();
17831 	}
17832 
17833       return;
17834     }
17835 
17836   if (TARGET_AVX
17837       && GET_MODE_SIZE (mode) == 32)
17838     {
17839       switch (GET_MODE_CLASS (mode))
17840 	{
17841 	case MODE_VECTOR_INT:
17842 	case MODE_INT:
17843 	  if (GET_MODE (op0) != V32QImode)
17844 	    {
17845 	      if (!MEM_P (op0))
17846 		{
17847 		  orig_op0 = op0;
17848 		  op0 = gen_reg_rtx (V32QImode);
17849 		}
17850 	      else
17851 		op0 = gen_lowpart (V32QImode, op0);
17852 	    }
17853 	  op1 = gen_lowpart (V32QImode, op1);
17854 	  /* FALLTHRU */
17855 
17856 	case MODE_VECTOR_FLOAT:
17857 	  ix86_avx256_split_vector_move_misalign (op0, op1);
17858 	  if (orig_op0)
17859 	    emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17860 	  break;
17861 
17862 	default:
17863 	  gcc_unreachable ();
17864 	}
17865 
17866       return;
17867     }
17868 
17869   if (MEM_P (op1))
17870     {
17871       /* Normal *mov<mode>_internal pattern will handle
17872 	 unaligned loads just fine if misaligned_operand
17873 	 is true, and without the UNSPEC it can be combined
17874 	 with arithmetic instructions.  */
17875       if (TARGET_AVX
17876 	  && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17877 	      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17878 	  && misaligned_operand (op1, GET_MODE (op1)))
17879 	emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17880       /* ??? If we have typed data, then it would appear that using
17881 	 movdqu is the only way to get unaligned data loaded with
17882 	 integer type.  */
17883       else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17884 	{
17885 	  if (GET_MODE (op0) != V16QImode)
17886 	    {
17887 	      orig_op0 = op0;
17888 	      op0 = gen_reg_rtx (V16QImode);
17889 	    }
17890 	  op1 = gen_lowpart (V16QImode, op1);
17891 	  /* We will eventually emit movups based on insn attributes.  */
17892 	  emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17893 	  if (orig_op0)
17894 	    emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17895 	}
17896       else if (TARGET_SSE2 && mode == V2DFmode)
17897         {
17898           rtx zero;
17899 
17900 	  if (TARGET_AVX
17901 	      || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17902 	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17903 	      || optimize_insn_for_size_p ())
17904 	    {
17905 	      /* We will eventually emit movups based on insn attributes.  */
17906 	      emit_insn (gen_sse2_loadupd (op0, op1));
17907 	      return;
17908 	    }
17909 
17910 	  /* When SSE registers are split into halves, we can avoid
17911 	     writing to the top half twice.  */
17912 	  if (TARGET_SSE_SPLIT_REGS)
17913 	    {
17914 	      emit_clobber (op0);
17915 	      zero = op0;
17916 	    }
17917 	  else
17918 	    {
17919 	      /* ??? Not sure about the best option for the Intel chips.
17920 		 The following would seem to satisfy; the register is
17921 		 entirely cleared, breaking the dependency chain.  We
17922 		 then store to the upper half, with a dependency depth
17923 		 of one.  A rumor has it that Intel recommends two movsd
17924 		 followed by an unpacklpd, but this is unconfirmed.  And
17925 		 given that the dependency depth of the unpacklpd would
17926 		 still be one, I'm not sure why this would be better.  */
17927 	      zero = CONST0_RTX (V2DFmode);
17928 	    }
17929 
17930 	  m = adjust_address (op1, DFmode, 0);
17931 	  emit_insn (gen_sse2_loadlpd (op0, zero, m));
17932 	  m = adjust_address (op1, DFmode, 8);
17933 	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
17934 	}
17935       else
17936         {
17937 	  rtx t;
17938 
17939 	  if (TARGET_AVX
17940 	      || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17941 	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17942 	      || optimize_insn_for_size_p ())
17943 	    {
17944 	      if (GET_MODE (op0) != V4SFmode)
17945 		{
17946 		  orig_op0 = op0;
17947 		  op0 = gen_reg_rtx (V4SFmode);
17948 		}
17949 	      op1 = gen_lowpart (V4SFmode, op1);
17950 	      emit_insn (gen_sse_loadups (op0, op1));
17951 	      if (orig_op0)
17952 		emit_move_insn (orig_op0,
17953 				gen_lowpart (GET_MODE (orig_op0), op0));
17954 	      return;
17955             }
17956 
17957 	  if (mode != V4SFmode)
17958 	    t = gen_reg_rtx (V4SFmode);
17959 	  else
17960 	    t = op0;
17961 
17962 	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17963 	    emit_move_insn (t, CONST0_RTX (V4SFmode));
17964 	  else
17965 	    emit_clobber (t);
17966 
17967 	  m = adjust_address (op1, V2SFmode, 0);
17968 	  emit_insn (gen_sse_loadlps (t, t, m));
17969 	  m = adjust_address (op1, V2SFmode, 8);
17970 	  emit_insn (gen_sse_loadhps (t, t, m));
17971 	  if (mode != V4SFmode)
17972 	    emit_move_insn (op0, gen_lowpart (mode, t));
17973 	}
17974     }
17975   else if (MEM_P (op0))
17976     {
17977       if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17978         {
17979 	  op0 = gen_lowpart (V16QImode, op0);
17980 	  op1 = gen_lowpart (V16QImode, op1);
17981 	  /* We will eventually emit movups based on insn attributes.  */
17982 	  emit_insn (gen_sse2_storedquv16qi (op0, op1));
17983 	}
17984       else if (TARGET_SSE2 && mode == V2DFmode)
17985 	{
17986 	  if (TARGET_AVX
17987 	      || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17988 	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17989 	      || optimize_insn_for_size_p ())
17990 	    /* We will eventually emit movups based on insn attributes.  */
17991 	    emit_insn (gen_sse2_storeupd (op0, op1));
17992 	  else
17993 	    {
17994 	      m = adjust_address (op0, DFmode, 0);
17995 	      emit_insn (gen_sse2_storelpd (m, op1));
17996 	      m = adjust_address (op0, DFmode, 8);
17997 	      emit_insn (gen_sse2_storehpd (m, op1));
17998 	    }
17999 	}
18000       else
18001 	{
18002 	  if (mode != V4SFmode)
18003 	    op1 = gen_lowpart (V4SFmode, op1);
18004 
18005 	  if (TARGET_AVX
18006 	      || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
18007 	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18008 	      || optimize_insn_for_size_p ())
18009 	    {
18010 	      op0 = gen_lowpart (V4SFmode, op0);
18011 	      emit_insn (gen_sse_storeups (op0, op1));
18012 	    }
18013 	  else
18014 	    {
18015 	      m = adjust_address (op0, V2SFmode, 0);
18016 	      emit_insn (gen_sse_storelps (m, op1));
18017 	      m = adjust_address (op0, V2SFmode, 8);
18018 	      emit_insn (gen_sse_storehps (m, op1));
18019 	    }
18020 	}
18021     }
18022   else
18023     gcc_unreachable ();
18024 }
18025 
18026 /* Helper function of ix86_fixup_binary_operands to canonicalize
18027    operand order.  Returns true if the operands should be swapped.  */
18028 
18029 static bool
18030 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
18031 			     rtx operands[])
18032 {
18033   rtx dst = operands[0];
18034   rtx src1 = operands[1];
18035   rtx src2 = operands[2];
18036 
18037   /* If the operation is not commutative, we can't do anything.  */
18038   if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
18039     return false;
18040 
18041   /* Highest priority is that src1 should match dst.  */
18042   if (rtx_equal_p (dst, src1))
18043     return false;
18044   if (rtx_equal_p (dst, src2))
18045     return true;
18046 
18047   /* Next highest priority is that immediate constants come second.  */
18048   if (immediate_operand (src2, mode))
18049     return false;
18050   if (immediate_operand (src1, mode))
18051     return true;
18052 
18053   /* Lowest priority is that memory references should come second.  */
18054   if (MEM_P (src2))
18055     return false;
18056   if (MEM_P (src1))
18057     return true;
18058 
18059   return false;
18060 }
18061 
18062 
18063 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
18064    destination to use for the operation.  If different from the true
18065    destination in operands[0], a copy operation will be required.  */
18066 
18067 rtx
18068 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
18069 			    rtx operands[])
18070 {
18071   rtx dst = operands[0];
18072   rtx src1 = operands[1];
18073   rtx src2 = operands[2];
18074 
18075   /* Canonicalize operand order.  */
18076   if (ix86_swap_binary_operands_p (code, mode, operands))
18077     {
18078       /* It is invalid to swap operands of different modes.  */
18079       gcc_assert (GET_MODE (src1) == GET_MODE (src2));
18080 
18081       std::swap (src1, src2);
18082     }
18083 
18084   /* Both source operands cannot be in memory.  */
18085   if (MEM_P (src1) && MEM_P (src2))
18086     {
18087       /* Optimization: Only read from memory once.  */
18088       if (rtx_equal_p (src1, src2))
18089 	{
18090 	  src2 = force_reg (mode, src2);
18091 	  src1 = src2;
18092 	}
18093       else if (rtx_equal_p (dst, src1))
18094 	src2 = force_reg (mode, src2);
18095       else
18096 	src1 = force_reg (mode, src1);
18097     }
18098 
18099   /* If the destination is memory, and we do not have matching source
18100      operands, do things in registers.  */
18101   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18102     dst = gen_reg_rtx (mode);
18103 
18104   /* Source 1 cannot be a constant.  */
18105   if (CONSTANT_P (src1))
18106     src1 = force_reg (mode, src1);
18107 
18108   /* Source 1 cannot be a non-matching memory.  */
18109   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18110     src1 = force_reg (mode, src1);
18111 
18112   /* Improve address combine.  */
18113   if (code == PLUS
18114       && GET_MODE_CLASS (mode) == MODE_INT
18115       && MEM_P (src2))
18116     src2 = force_reg (mode, src2);
18117 
18118   operands[1] = src1;
18119   operands[2] = src2;
18120   return dst;
18121 }
18122 
18123 /* Similarly, but assume that the destination has already been
18124    set up properly.  */
18125 
18126 void
18127 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
18128 				    machine_mode mode, rtx operands[])
18129 {
18130   rtx dst = ix86_fixup_binary_operands (code, mode, operands);
18131   gcc_assert (dst == operands[0]);
18132 }
18133 
18134 /* Attempt to expand a binary operator.  Make the expansion closer to the
18135    actual machine, then just general_operand, which will allow 3 separate
18136    memory references (one output, two input) in a single insn.  */
18137 
18138 void
18139 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
18140 			     rtx operands[])
18141 {
18142   rtx src1, src2, dst, op, clob;
18143 
18144   dst = ix86_fixup_binary_operands (code, mode, operands);
18145   src1 = operands[1];
18146   src2 = operands[2];
18147 
18148  /* Emit the instruction.  */
18149 
18150   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18151   if (reload_in_progress)
18152     {
18153       /* Reload doesn't know about the flags register, and doesn't know that
18154          it doesn't want to clobber it.  We can only do this with PLUS.  */
18155       gcc_assert (code == PLUS);
18156       emit_insn (op);
18157     }
18158   else if (reload_completed
18159 	   && code == PLUS
18160 	   && !rtx_equal_p (dst, src1))
18161     {
18162       /* This is going to be an LEA; avoid splitting it later.  */
18163       emit_insn (op);
18164     }
18165   else
18166     {
18167       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18168       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18169     }
18170 
18171   /* Fix up the destination if needed.  */
18172   if (dst != operands[0])
18173     emit_move_insn (operands[0], dst);
18174 }
18175 
18176 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18177    the given OPERANDS.  */
18178 
18179 void
18180 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18181 				     rtx operands[])
18182 {
18183   rtx op1 = NULL_RTX, op2 = NULL_RTX;
18184   if (GET_CODE (operands[1]) == SUBREG)
18185     {
18186       op1 = operands[1];
18187       op2 = operands[2];
18188     }
18189   else if (GET_CODE (operands[2]) == SUBREG)
18190     {
18191       op1 = operands[2];
18192       op2 = operands[1];
18193     }
18194   /* Optimize (__m128i) d | (__m128i) e and similar code
18195      when d and e are float vectors into float vector logical
18196      insn.  In C/C++ without using intrinsics there is no other way
18197      to express vector logical operation on float vectors than
18198      to cast them temporarily to integer vectors.  */
18199   if (op1
18200       && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18201       && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18202       && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18203       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18204       && SUBREG_BYTE (op1) == 0
18205       && (GET_CODE (op2) == CONST_VECTOR
18206 	  || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18207 	      && SUBREG_BYTE (op2) == 0))
18208       && can_create_pseudo_p ())
18209     {
18210       rtx dst;
18211       switch (GET_MODE (SUBREG_REG (op1)))
18212 	{
18213 	case V4SFmode:
18214 	case V8SFmode:
18215 	case V16SFmode:
18216 	case V2DFmode:
18217 	case V4DFmode:
18218 	case V8DFmode:
18219 	  dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18220 	  if (GET_CODE (op2) == CONST_VECTOR)
18221 	    {
18222 	      op2 = gen_lowpart (GET_MODE (dst), op2);
18223 	      op2 = force_reg (GET_MODE (dst), op2);
18224 	    }
18225 	  else
18226 	    {
18227 	      op1 = operands[1];
18228 	      op2 = SUBREG_REG (operands[2]);
18229 	      if (!nonimmediate_operand (op2, GET_MODE (dst)))
18230 		op2 = force_reg (GET_MODE (dst), op2);
18231 	    }
18232 	  op1 = SUBREG_REG (op1);
18233 	  if (!nonimmediate_operand (op1, GET_MODE (dst)))
18234 	    op1 = force_reg (GET_MODE (dst), op1);
18235 	  emit_insn (gen_rtx_SET (VOIDmode, dst,
18236 				  gen_rtx_fmt_ee (code, GET_MODE (dst),
18237 						  op1, op2)));
18238 	  emit_move_insn (operands[0], gen_lowpart (mode, dst));
18239 	  return;
18240 	default:
18241 	  break;
18242 	}
18243     }
18244   if (!nonimmediate_operand (operands[1], mode))
18245     operands[1] = force_reg (mode, operands[1]);
18246   if (!nonimmediate_operand (operands[2], mode))
18247     operands[2] = force_reg (mode, operands[2]);
18248   ix86_fixup_binary_operands_no_copy (code, mode, operands);
18249   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18250 			  gen_rtx_fmt_ee (code, mode, operands[1],
18251 					  operands[2])));
18252 }
18253 
18254 /* Return TRUE or FALSE depending on whether the binary operator meets the
18255    appropriate constraints.  */
18256 
18257 bool
18258 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18259 			 rtx operands[3])
18260 {
18261   rtx dst = operands[0];
18262   rtx src1 = operands[1];
18263   rtx src2 = operands[2];
18264 
18265   /* Both source operands cannot be in memory.  */
18266   if (MEM_P (src1) && MEM_P (src2))
18267     return false;
18268 
18269   /* Canonicalize operand order for commutative operators.  */
18270   if (ix86_swap_binary_operands_p (code, mode, operands))
18271     std::swap (src1, src2);
18272 
18273   /* If the destination is memory, we must have a matching source operand.  */
18274   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18275       return false;
18276 
18277   /* Source 1 cannot be a constant.  */
18278   if (CONSTANT_P (src1))
18279     return false;
18280 
18281   /* Source 1 cannot be a non-matching memory.  */
18282   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18283     /* Support "andhi/andsi/anddi" as a zero-extending move.  */
18284     return (code == AND
18285 	    && (mode == HImode
18286 		|| mode == SImode
18287 		|| (TARGET_64BIT && mode == DImode))
18288 	    && satisfies_constraint_L (src2));
18289 
18290   return true;
18291 }
18292 
18293 /* Attempt to expand a unary operator.  Make the expansion closer to the
18294    actual machine, then just general_operand, which will allow 2 separate
18295    memory references (one output, one input) in a single insn.  */
18296 
18297 void
18298 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18299 			    rtx operands[])
18300 {
18301   bool matching_memory = false;
18302   rtx src, dst, op, clob;
18303 
18304   dst = operands[0];
18305   src = operands[1];
18306 
18307   /* If the destination is memory, and we do not have matching source
18308      operands, do things in registers.  */
18309   if (MEM_P (dst))
18310     {
18311       if (rtx_equal_p (dst, src))
18312 	matching_memory = true;
18313       else
18314 	dst = gen_reg_rtx (mode);
18315     }
18316 
18317   /* When source operand is memory, destination must match.  */
18318   if (MEM_P (src) && !matching_memory)
18319     src = force_reg (mode, src);
18320 
18321   /* Emit the instruction.  */
18322 
18323   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18324   if (reload_in_progress || code == NOT)
18325     {
18326       /* Reload doesn't know about the flags register, and doesn't know that
18327          it doesn't want to clobber it.  */
18328       gcc_assert (code == NOT);
18329       emit_insn (op);
18330     }
18331   else
18332     {
18333       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18334       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18335     }
18336 
18337   /* Fix up the destination if needed.  */
18338   if (dst != operands[0])
18339     emit_move_insn (operands[0], dst);
18340 }
18341 
18342 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18343    divisor are within the range [0-255].  */
18344 
18345 void
18346 ix86_split_idivmod (machine_mode mode, rtx operands[],
18347 		    bool signed_p)
18348 {
18349   rtx_code_label *end_label, *qimode_label;
18350   rtx insn, div, mod;
18351   rtx scratch, tmp0, tmp1, tmp2;
18352   rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18353   rtx (*gen_zero_extend) (rtx, rtx);
18354   rtx (*gen_test_ccno_1) (rtx, rtx);
18355 
18356   switch (mode)
18357     {
18358     case SImode:
18359       gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18360       gen_test_ccno_1 = gen_testsi_ccno_1;
18361       gen_zero_extend = gen_zero_extendqisi2;
18362       break;
18363     case DImode:
18364       gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18365       gen_test_ccno_1 = gen_testdi_ccno_1;
18366       gen_zero_extend = gen_zero_extendqidi2;
18367       break;
18368     default:
18369       gcc_unreachable ();
18370     }
18371 
18372   end_label = gen_label_rtx ();
18373   qimode_label = gen_label_rtx ();
18374 
18375   scratch = gen_reg_rtx (mode);
18376 
18377   /* Use 8bit unsigned divimod if dividend and divisor are within
18378      the range [0-255].  */
18379   emit_move_insn (scratch, operands[2]);
18380   scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18381 				 scratch, 1, OPTAB_DIRECT);
18382   emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18383   tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18384   tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18385   tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18386 			       gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18387 			       pc_rtx);
18388   insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18389   predict_jump (REG_BR_PROB_BASE * 50 / 100);
18390   JUMP_LABEL (insn) = qimode_label;
18391 
18392   /* Generate original signed/unsigned divimod.  */
18393   div = gen_divmod4_1 (operands[0], operands[1],
18394 		       operands[2], operands[3]);
18395   emit_insn (div);
18396 
18397   /* Branch to the end.  */
18398   emit_jump_insn (gen_jump (end_label));
18399   emit_barrier ();
18400 
18401   /* Generate 8bit unsigned divide.  */
18402   emit_label (qimode_label);
18403   /* Don't use operands[0] for result of 8bit divide since not all
18404      registers support QImode ZERO_EXTRACT.  */
18405   tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18406   tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18407   tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18408   emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18409 
18410   if (signed_p)
18411     {
18412       div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18413       mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18414     }
18415   else
18416     {
18417       div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18418       mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18419     }
18420 
18421   /* Extract remainder from AH.  */
18422   tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18423   if (REG_P (operands[1]))
18424     insn = emit_move_insn (operands[1], tmp1);
18425   else
18426     {
18427       /* Need a new scratch register since the old one has result
18428 	 of 8bit divide.  */
18429       scratch = gen_reg_rtx (mode);
18430       emit_move_insn (scratch, tmp1);
18431       insn = emit_move_insn (operands[1], scratch);
18432     }
18433   set_unique_reg_note (insn, REG_EQUAL, mod);
18434 
18435   /* Zero extend quotient from AL.  */
18436   tmp1 = gen_lowpart (QImode, tmp0);
18437   insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18438   set_unique_reg_note (insn, REG_EQUAL, div);
18439 
18440   emit_label (end_label);
18441 }
18442 
18443 #define LEA_MAX_STALL (3)
18444 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18445 
18446 /* Increase given DISTANCE in half-cycles according to
18447    dependencies between PREV and NEXT instructions.
18448    Add 1 half-cycle if there is no dependency and
18449    go to next cycle if there is some dependecy.  */
18450 
18451 static unsigned int
18452 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18453 {
18454   df_ref def, use;
18455 
18456   if (!prev || !next)
18457     return distance + (distance & 1) + 2;
18458 
18459   if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18460     return distance + 1;
18461 
18462   FOR_EACH_INSN_USE (use, next)
18463     FOR_EACH_INSN_DEF (def, prev)
18464       if (!DF_REF_IS_ARTIFICIAL (def)
18465 	  && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18466 	return distance + (distance & 1) + 2;
18467 
18468   return distance + 1;
18469 }
18470 
18471 /* Function checks if instruction INSN defines register number
18472    REGNO1 or REGNO2.  */
18473 
18474 static bool
18475 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18476 		  rtx insn)
18477 {
18478   df_ref def;
18479 
18480   FOR_EACH_INSN_DEF (def, insn)
18481     if (DF_REF_REG_DEF_P (def)
18482 	&& !DF_REF_IS_ARTIFICIAL (def)
18483 	&& (regno1 == DF_REF_REGNO (def)
18484 	    || regno2 == DF_REF_REGNO (def)))
18485       return true;
18486 
18487   return false;
18488 }
18489 
18490 /* Function checks if instruction INSN uses register number
18491    REGNO as a part of address expression.  */
18492 
18493 static bool
18494 insn_uses_reg_mem (unsigned int regno, rtx insn)
18495 {
18496   df_ref use;
18497 
18498   FOR_EACH_INSN_USE (use, insn)
18499     if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18500       return true;
18501 
18502   return false;
18503 }
18504 
18505 /* Search backward for non-agu definition of register number REGNO1
18506    or register number REGNO2 in basic block starting from instruction
18507    START up to head of basic block or instruction INSN.
18508 
18509    Function puts true value into *FOUND var if definition was found
18510    and false otherwise.
18511 
18512    Distance in half-cycles between START and found instruction or head
18513    of BB is added to DISTANCE and returned.  */
18514 
18515 static int
18516 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18517 			       rtx_insn *insn, int distance,
18518 			       rtx_insn *start, bool *found)
18519 {
18520   basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18521   rtx_insn *prev = start;
18522   rtx_insn *next = NULL;
18523 
18524   *found = false;
18525 
18526   while (prev
18527 	 && prev != insn
18528 	 && distance < LEA_SEARCH_THRESHOLD)
18529     {
18530       if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18531 	{
18532 	  distance = increase_distance (prev, next, distance);
18533 	  if (insn_defines_reg (regno1, regno2, prev))
18534 	    {
18535 	      if (recog_memoized (prev) < 0
18536 		  || get_attr_type (prev) != TYPE_LEA)
18537 		{
18538 		  *found = true;
18539 		  return distance;
18540 		}
18541 	    }
18542 
18543 	  next = prev;
18544 	}
18545       if (prev == BB_HEAD (bb))
18546 	break;
18547 
18548       prev = PREV_INSN (prev);
18549     }
18550 
18551   return distance;
18552 }
18553 
18554 /* Search backward for non-agu definition of register number REGNO1
18555    or register number REGNO2 in INSN's basic block until
18556    1. Pass LEA_SEARCH_THRESHOLD instructions, or
18557    2. Reach neighbour BBs boundary, or
18558    3. Reach agu definition.
18559    Returns the distance between the non-agu definition point and INSN.
18560    If no definition point, returns -1.  */
18561 
18562 static int
18563 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18564 			 rtx_insn *insn)
18565 {
18566   basic_block bb = BLOCK_FOR_INSN (insn);
18567   int distance = 0;
18568   bool found = false;
18569 
18570   if (insn != BB_HEAD (bb))
18571     distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18572 					      distance, PREV_INSN (insn),
18573 					      &found);
18574 
18575   if (!found && distance < LEA_SEARCH_THRESHOLD)
18576     {
18577       edge e;
18578       edge_iterator ei;
18579       bool simple_loop = false;
18580 
18581       FOR_EACH_EDGE (e, ei, bb->preds)
18582 	if (e->src == bb)
18583 	  {
18584 	    simple_loop = true;
18585 	    break;
18586 	  }
18587 
18588       if (simple_loop)
18589 	distance = distance_non_agu_define_in_bb (regno1, regno2,
18590 						  insn, distance,
18591 						  BB_END (bb), &found);
18592       else
18593 	{
18594 	  int shortest_dist = -1;
18595 	  bool found_in_bb = false;
18596 
18597 	  FOR_EACH_EDGE (e, ei, bb->preds)
18598 	    {
18599 	      int bb_dist
18600 		= distance_non_agu_define_in_bb (regno1, regno2,
18601 						 insn, distance,
18602 						 BB_END (e->src),
18603 						 &found_in_bb);
18604 	      if (found_in_bb)
18605 		{
18606 		  if (shortest_dist < 0)
18607 		    shortest_dist = bb_dist;
18608 		  else if (bb_dist > 0)
18609 		    shortest_dist = MIN (bb_dist, shortest_dist);
18610 
18611 		  found = true;
18612 		}
18613 	    }
18614 
18615 	  distance = shortest_dist;
18616 	}
18617     }
18618 
18619   /* get_attr_type may modify recog data.  We want to make sure
18620      that recog data is valid for instruction INSN, on which
18621      distance_non_agu_define is called.  INSN is unchanged here.  */
18622   extract_insn_cached (insn);
18623 
18624   if (!found)
18625     return -1;
18626 
18627   return distance >> 1;
18628 }
18629 
18630 /* Return the distance in half-cycles between INSN and the next
18631    insn that uses register number REGNO in memory address added
18632    to DISTANCE.  Return -1 if REGNO0 is set.
18633 
18634    Put true value into *FOUND if register usage was found and
18635    false otherwise.
18636    Put true value into *REDEFINED if register redefinition was
18637    found and false otherwise.  */
18638 
18639 static int
18640 distance_agu_use_in_bb (unsigned int regno,
18641 			rtx_insn *insn, int distance, rtx_insn *start,
18642 			bool *found, bool *redefined)
18643 {
18644   basic_block bb = NULL;
18645   rtx_insn *next = start;
18646   rtx_insn *prev = NULL;
18647 
18648   *found = false;
18649   *redefined = false;
18650 
18651   if (start != NULL_RTX)
18652     {
18653       bb = BLOCK_FOR_INSN (start);
18654       if (start != BB_HEAD (bb))
18655 	/* If insn and start belong to the same bb, set prev to insn,
18656 	   so the call to increase_distance will increase the distance
18657 	   between insns by 1.  */
18658 	prev = insn;
18659     }
18660 
18661   while (next
18662 	 && next != insn
18663 	 && distance < LEA_SEARCH_THRESHOLD)
18664     {
18665       if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18666 	{
18667 	  distance = increase_distance(prev, next, distance);
18668 	  if (insn_uses_reg_mem (regno, next))
18669 	    {
18670 	      /* Return DISTANCE if OP0 is used in memory
18671 		 address in NEXT.  */
18672 	      *found = true;
18673 	      return distance;
18674 	    }
18675 
18676 	  if (insn_defines_reg (regno, INVALID_REGNUM, next))
18677 	    {
18678 	      /* Return -1 if OP0 is set in NEXT.  */
18679 	      *redefined = true;
18680 	      return -1;
18681 	    }
18682 
18683 	  prev = next;
18684 	}
18685 
18686       if (next == BB_END (bb))
18687 	break;
18688 
18689       next = NEXT_INSN (next);
18690     }
18691 
18692   return distance;
18693 }
18694 
18695 /* Return the distance between INSN and the next insn that uses
18696    register number REGNO0 in memory address.  Return -1 if no such
18697    a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
18698 
18699 static int
18700 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18701 {
18702   basic_block bb = BLOCK_FOR_INSN (insn);
18703   int distance = 0;
18704   bool found = false;
18705   bool redefined = false;
18706 
18707   if (insn != BB_END (bb))
18708     distance = distance_agu_use_in_bb (regno0, insn, distance,
18709 				       NEXT_INSN (insn),
18710 				       &found, &redefined);
18711 
18712   if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18713     {
18714       edge e;
18715       edge_iterator ei;
18716       bool simple_loop = false;
18717 
18718       FOR_EACH_EDGE (e, ei, bb->succs)
18719         if (e->dest == bb)
18720 	  {
18721 	    simple_loop = true;
18722 	    break;
18723 	  }
18724 
18725       if (simple_loop)
18726 	distance = distance_agu_use_in_bb (regno0, insn,
18727 					   distance, BB_HEAD (bb),
18728 					   &found, &redefined);
18729       else
18730 	{
18731 	  int shortest_dist = -1;
18732 	  bool found_in_bb = false;
18733 	  bool redefined_in_bb = false;
18734 
18735 	  FOR_EACH_EDGE (e, ei, bb->succs)
18736 	    {
18737 	      int bb_dist
18738 		= distance_agu_use_in_bb (regno0, insn,
18739 					  distance, BB_HEAD (e->dest),
18740 					  &found_in_bb, &redefined_in_bb);
18741 	      if (found_in_bb)
18742 		{
18743 		  if (shortest_dist < 0)
18744 		    shortest_dist = bb_dist;
18745 		  else if (bb_dist > 0)
18746 		    shortest_dist = MIN (bb_dist, shortest_dist);
18747 
18748 		  found = true;
18749 		}
18750 	    }
18751 
18752 	  distance = shortest_dist;
18753 	}
18754     }
18755 
18756   if (!found || redefined)
18757     return -1;
18758 
18759   return distance >> 1;
18760 }
18761 
18762 /* Define this macro to tune LEA priority vs ADD, it take effect when
18763    there is a dilemma of choicing LEA or ADD
18764    Negative value: ADD is more preferred than LEA
18765    Zero: Netrual
18766    Positive value: LEA is more preferred than ADD*/
18767 #define IX86_LEA_PRIORITY 0
18768 
18769 /* Return true if usage of lea INSN has performance advantage
18770    over a sequence of instructions.  Instructions sequence has
18771    SPLIT_COST cycles higher latency than lea latency.  */
18772 
18773 static bool
18774 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18775 		      unsigned int regno2, int split_cost, bool has_scale)
18776 {
18777   int dist_define, dist_use;
18778 
18779   /* For Silvermont if using a 2-source or 3-source LEA for
18780      non-destructive destination purposes, or due to wanting
18781      ability to use SCALE, the use of LEA is justified.  */
18782   if (TARGET_SILVERMONT || TARGET_INTEL)
18783     {
18784       if (has_scale)
18785 	return true;
18786       if (split_cost < 1)
18787 	return false;
18788       if (regno0 == regno1 || regno0 == regno2)
18789 	return false;
18790       return true;
18791     }
18792 
18793   dist_define = distance_non_agu_define (regno1, regno2, insn);
18794   dist_use = distance_agu_use (regno0, insn);
18795 
18796   if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18797     {
18798       /* If there is no non AGU operand definition, no AGU
18799 	 operand usage and split cost is 0 then both lea
18800 	 and non lea variants have same priority.  Currently
18801 	 we prefer lea for 64 bit code and non lea on 32 bit
18802 	 code.  */
18803       if (dist_use < 0 && split_cost == 0)
18804 	return TARGET_64BIT || IX86_LEA_PRIORITY;
18805       else
18806 	return true;
18807     }
18808 
18809   /* With longer definitions distance lea is more preferable.
18810      Here we change it to take into account splitting cost and
18811      lea priority.  */
18812   dist_define += split_cost + IX86_LEA_PRIORITY;
18813 
18814   /* If there is no use in memory addess then we just check
18815      that split cost exceeds AGU stall.  */
18816   if (dist_use < 0)
18817     return dist_define > LEA_MAX_STALL;
18818 
18819   /* If this insn has both backward non-agu dependence and forward
18820      agu dependence, the one with short distance takes effect.  */
18821   return dist_define >= dist_use;
18822 }
18823 
18824 /* Return true if it is legal to clobber flags by INSN and
18825    false otherwise.  */
18826 
18827 static bool
18828 ix86_ok_to_clobber_flags (rtx_insn *insn)
18829 {
18830   basic_block bb = BLOCK_FOR_INSN (insn);
18831   df_ref use;
18832   bitmap live;
18833 
18834   while (insn)
18835     {
18836       if (NONDEBUG_INSN_P (insn))
18837 	{
18838 	  FOR_EACH_INSN_USE (use, insn)
18839 	    if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18840 	      return false;
18841 
18842 	  if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18843 	    return true;
18844 	}
18845 
18846       if (insn == BB_END (bb))
18847 	break;
18848 
18849       insn = NEXT_INSN (insn);
18850     }
18851 
18852   live = df_get_live_out(bb);
18853   return !REGNO_REG_SET_P (live, FLAGS_REG);
18854 }
18855 
18856 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18857    move and add to avoid AGU stalls.  */
18858 
18859 bool
18860 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18861 {
18862   unsigned int regno0, regno1, regno2;
18863 
18864   /* Check if we need to optimize.  */
18865   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18866     return false;
18867 
18868   /* Check it is correct to split here.  */
18869   if (!ix86_ok_to_clobber_flags(insn))
18870     return false;
18871 
18872   regno0 = true_regnum (operands[0]);
18873   regno1 = true_regnum (operands[1]);
18874   regno2 = true_regnum (operands[2]);
18875 
18876   /* We need to split only adds with non destructive
18877      destination operand.  */
18878   if (regno0 == regno1 || regno0 == regno2)
18879     return false;
18880   else
18881     return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18882 }
18883 
18884 /* Return true if we should emit lea instruction instead of mov
18885    instruction.  */
18886 
18887 bool
18888 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18889 {
18890   unsigned int regno0, regno1;
18891 
18892   /* Check if we need to optimize.  */
18893   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18894     return false;
18895 
18896   /* Use lea for reg to reg moves only.  */
18897   if (!REG_P (operands[0]) || !REG_P (operands[1]))
18898     return false;
18899 
18900   regno0 = true_regnum (operands[0]);
18901   regno1 = true_regnum (operands[1]);
18902 
18903   return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18904 }
18905 
18906 /* Return true if we need to split lea into a sequence of
18907    instructions to avoid AGU stalls. */
18908 
18909 bool
18910 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18911 {
18912   unsigned int regno0, regno1, regno2;
18913   int split_cost;
18914   struct ix86_address parts;
18915   int ok;
18916 
18917   /* Check we need to optimize.  */
18918   if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18919     return false;
18920 
18921   /* The "at least two components" test below might not catch simple
18922      move or zero extension insns if parts.base is non-NULL and parts.disp
18923      is const0_rtx as the only components in the address, e.g. if the
18924      register is %rbp or %r13.  As this test is much cheaper and moves or
18925      zero extensions are the common case, do this check first.  */
18926   if (REG_P (operands[1])
18927       || (SImode_address_operand (operands[1], VOIDmode)
18928 	  && REG_P (XEXP (operands[1], 0))))
18929     return false;
18930 
18931   /* Check if it is OK to split here.  */
18932   if (!ix86_ok_to_clobber_flags (insn))
18933     return false;
18934 
18935   ok = ix86_decompose_address (operands[1], &parts);
18936   gcc_assert (ok);
18937 
18938   /* There should be at least two components in the address.  */
18939   if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18940       + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18941     return false;
18942 
18943   /* We should not split into add if non legitimate pic
18944      operand is used as displacement. */
18945   if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18946     return false;
18947 
18948   regno0 = true_regnum (operands[0]) ;
18949   regno1 = INVALID_REGNUM;
18950   regno2 = INVALID_REGNUM;
18951 
18952   if (parts.base)
18953     regno1 = true_regnum (parts.base);
18954   if (parts.index)
18955     regno2 = true_regnum (parts.index);
18956 
18957   split_cost = 0;
18958 
18959   /* Compute how many cycles we will add to execution time
18960      if split lea into a sequence of instructions.  */
18961   if (parts.base || parts.index)
18962     {
18963       /* Have to use mov instruction if non desctructive
18964 	 destination form is used.  */
18965       if (regno1 != regno0 && regno2 != regno0)
18966 	split_cost += 1;
18967 
18968       /* Have to add index to base if both exist.  */
18969       if (parts.base && parts.index)
18970 	split_cost += 1;
18971 
18972       /* Have to use shift and adds if scale is 2 or greater.  */
18973       if (parts.scale > 1)
18974 	{
18975 	  if (regno0 != regno1)
18976 	    split_cost += 1;
18977 	  else if (regno2 == regno0)
18978 	    split_cost += 4;
18979 	  else
18980 	    split_cost += parts.scale;
18981 	}
18982 
18983       /* Have to use add instruction with immediate if
18984 	 disp is non zero.  */
18985       if (parts.disp && parts.disp != const0_rtx)
18986 	split_cost += 1;
18987 
18988       /* Subtract the price of lea.  */
18989       split_cost -= 1;
18990     }
18991 
18992   return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18993 				parts.scale > 1);
18994 }
18995 
18996 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18997    matches destination.  RTX includes clobber of FLAGS_REG.  */
18998 
18999 static void
19000 ix86_emit_binop (enum rtx_code code, machine_mode mode,
19001 		 rtx dst, rtx src)
19002 {
19003   rtx op, clob;
19004 
19005   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
19006   clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19007 
19008   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19009 }
19010 
19011 /* Return true if regno1 def is nearest to the insn.  */
19012 
19013 static bool
19014 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
19015 {
19016   rtx_insn *prev = insn;
19017   rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
19018 
19019   if (insn == start)
19020     return false;
19021   while (prev && prev != start)
19022     {
19023       if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
19024 	{
19025 	  prev = PREV_INSN (prev);
19026 	  continue;
19027 	}
19028       if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
19029 	return true;
19030       else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
19031 	return false;
19032       prev = PREV_INSN (prev);
19033     }
19034 
19035   /* None of the regs is defined in the bb.  */
19036   return false;
19037 }
19038 
19039 /* Split lea instructions into a sequence of instructions
19040    which are executed on ALU to avoid AGU stalls.
19041    It is assumed that it is allowed to clobber flags register
19042    at lea position.  */
19043 
19044 void
19045 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
19046 {
19047   unsigned int regno0, regno1, regno2;
19048   struct ix86_address parts;
19049   rtx target, tmp;
19050   int ok, adds;
19051 
19052   ok = ix86_decompose_address (operands[1], &parts);
19053   gcc_assert (ok);
19054 
19055   target = gen_lowpart (mode, operands[0]);
19056 
19057   regno0 = true_regnum (target);
19058   regno1 = INVALID_REGNUM;
19059   regno2 = INVALID_REGNUM;
19060 
19061   if (parts.base)
19062     {
19063       parts.base = gen_lowpart (mode, parts.base);
19064       regno1 = true_regnum (parts.base);
19065     }
19066 
19067   if (parts.index)
19068     {
19069       parts.index = gen_lowpart (mode, parts.index);
19070       regno2 = true_regnum (parts.index);
19071     }
19072 
19073   if (parts.disp)
19074     parts.disp = gen_lowpart (mode, parts.disp);
19075 
19076   if (parts.scale > 1)
19077     {
19078       /* Case r1 = r1 + ...  */
19079       if (regno1 == regno0)
19080 	{
19081 	  /* If we have a case r1 = r1 + C * r2 then we
19082 	     should use multiplication which is very
19083 	     expensive.  Assume cost model is wrong if we
19084 	     have such case here.  */
19085 	  gcc_assert (regno2 != regno0);
19086 
19087 	  for (adds = parts.scale; adds > 0; adds--)
19088 	    ix86_emit_binop (PLUS, mode, target, parts.index);
19089 	}
19090       else
19091 	{
19092 	  /* r1 = r2 + r3 * C case.  Need to move r3 into r1.  */
19093 	  if (regno0 != regno2)
19094 	    emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
19095 
19096 	  /* Use shift for scaling.  */
19097 	  ix86_emit_binop (ASHIFT, mode, target,
19098 			   GEN_INT (exact_log2 (parts.scale)));
19099 
19100 	  if (parts.base)
19101 	    ix86_emit_binop (PLUS, mode, target, parts.base);
19102 
19103 	  if (parts.disp && parts.disp != const0_rtx)
19104 	    ix86_emit_binop (PLUS, mode, target, parts.disp);
19105 	}
19106     }
19107   else if (!parts.base && !parts.index)
19108     {
19109       gcc_assert(parts.disp);
19110       emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
19111     }
19112   else
19113     {
19114       if (!parts.base)
19115 	{
19116 	  if (regno0 != regno2)
19117 	    emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
19118 	}
19119       else if (!parts.index)
19120 	{
19121 	  if (regno0 != regno1)
19122 	    emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
19123 	}
19124       else
19125 	{
19126 	  if (regno0 == regno1)
19127 	    tmp = parts.index;
19128 	  else if (regno0 == regno2)
19129 	    tmp = parts.base;
19130 	  else
19131 	    {
19132 	      rtx tmp1;
19133 
19134 	      /* Find better operand for SET instruction, depending
19135 		 on which definition is farther from the insn.  */
19136 	      if (find_nearest_reg_def (insn, regno1, regno2))
19137 		tmp = parts.index, tmp1 = parts.base;
19138 	      else
19139 		tmp = parts.base, tmp1 = parts.index;
19140 
19141 	      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19142 
19143 	      if (parts.disp && parts.disp != const0_rtx)
19144 		ix86_emit_binop (PLUS, mode, target, parts.disp);
19145 
19146 	      ix86_emit_binop (PLUS, mode, target, tmp1);
19147 	      return;
19148 	    }
19149 
19150 	  ix86_emit_binop (PLUS, mode, target, tmp);
19151 	}
19152 
19153       if (parts.disp && parts.disp != const0_rtx)
19154 	ix86_emit_binop (PLUS, mode, target, parts.disp);
19155     }
19156 }
19157 
19158 /* Return true if it is ok to optimize an ADD operation to LEA
19159    operation to avoid flag register consumation.  For most processors,
19160    ADD is faster than LEA.  For the processors like BONNELL, if the
19161    destination register of LEA holds an actual address which will be
19162    used soon, LEA is better and otherwise ADD is better.  */
19163 
19164 bool
19165 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19166 {
19167   unsigned int regno0 = true_regnum (operands[0]);
19168   unsigned int regno1 = true_regnum (operands[1]);
19169   unsigned int regno2 = true_regnum (operands[2]);
19170 
19171   /* If a = b + c, (a!=b && a!=c), must use lea form. */
19172   if (regno0 != regno1 && regno0 != regno2)
19173     return true;
19174 
19175   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19176     return false;
19177 
19178   return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19179 }
19180 
19181 /* Return true if destination reg of SET_BODY is shift count of
19182    USE_BODY.  */
19183 
19184 static bool
19185 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19186 {
19187   rtx set_dest;
19188   rtx shift_rtx;
19189   int i;
19190 
19191   /* Retrieve destination of SET_BODY.  */
19192   switch (GET_CODE (set_body))
19193     {
19194     case SET:
19195       set_dest = SET_DEST (set_body);
19196       if (!set_dest || !REG_P (set_dest))
19197 	return false;
19198       break;
19199     case PARALLEL:
19200       for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19201 	if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19202 					  use_body))
19203 	  return true;
19204     default:
19205       return false;
19206       break;
19207     }
19208 
19209   /* Retrieve shift count of USE_BODY.  */
19210   switch (GET_CODE (use_body))
19211     {
19212     case SET:
19213       shift_rtx = XEXP (use_body, 1);
19214       break;
19215     case PARALLEL:
19216       for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19217 	if (ix86_dep_by_shift_count_body (set_body,
19218 					  XVECEXP (use_body, 0, i)))
19219 	  return true;
19220     default:
19221       return false;
19222       break;
19223     }
19224 
19225   if (shift_rtx
19226       && (GET_CODE (shift_rtx) == ASHIFT
19227 	  || GET_CODE (shift_rtx) == LSHIFTRT
19228 	  || GET_CODE (shift_rtx) == ASHIFTRT
19229 	  || GET_CODE (shift_rtx) == ROTATE
19230 	  || GET_CODE (shift_rtx) == ROTATERT))
19231     {
19232       rtx shift_count = XEXP (shift_rtx, 1);
19233 
19234       /* Return true if shift count is dest of SET_BODY.  */
19235       if (REG_P (shift_count))
19236 	{
19237 	  /* Add check since it can be invoked before register
19238 	     allocation in pre-reload schedule.  */
19239 	  if (reload_completed
19240 	      && true_regnum (set_dest) == true_regnum (shift_count))
19241 	    return true;
19242 	  else if (REGNO(set_dest) == REGNO(shift_count))
19243 	    return true;
19244 	}
19245     }
19246 
19247   return false;
19248 }
19249 
19250 /* Return true if destination reg of SET_INSN is shift count of
19251    USE_INSN.  */
19252 
19253 bool
19254 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19255 {
19256   return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19257 				       PATTERN (use_insn));
19258 }
19259 
19260 /* Return TRUE or FALSE depending on whether the unary operator meets the
19261    appropriate constraints.  */
19262 
19263 bool
19264 ix86_unary_operator_ok (enum rtx_code,
19265 			machine_mode,
19266 			rtx operands[2])
19267 {
19268   /* If one of operands is memory, source and destination must match.  */
19269   if ((MEM_P (operands[0])
19270        || MEM_P (operands[1]))
19271       && ! rtx_equal_p (operands[0], operands[1]))
19272     return false;
19273   return true;
19274 }
19275 
19276 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19277    are ok, keeping in mind the possible movddup alternative.  */
19278 
19279 bool
19280 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19281 {
19282   if (MEM_P (operands[0]))
19283     return rtx_equal_p (operands[0], operands[1 + high]);
19284   if (MEM_P (operands[1]) && MEM_P (operands[2]))
19285     return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19286   return true;
19287 }
19288 
19289 /* Post-reload splitter for converting an SF or DFmode value in an
19290    SSE register into an unsigned SImode.  */
19291 
19292 void
19293 ix86_split_convert_uns_si_sse (rtx operands[])
19294 {
19295   machine_mode vecmode;
19296   rtx value, large, zero_or_two31, input, two31, x;
19297 
19298   large = operands[1];
19299   zero_or_two31 = operands[2];
19300   input = operands[3];
19301   two31 = operands[4];
19302   vecmode = GET_MODE (large);
19303   value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19304 
19305   /* Load up the value into the low element.  We must ensure that the other
19306      elements are valid floats -- zero is the easiest such value.  */
19307   if (MEM_P (input))
19308     {
19309       if (vecmode == V4SFmode)
19310 	emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19311       else
19312 	emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19313     }
19314   else
19315     {
19316       input = gen_rtx_REG (vecmode, REGNO (input));
19317       emit_move_insn (value, CONST0_RTX (vecmode));
19318       if (vecmode == V4SFmode)
19319 	emit_insn (gen_sse_movss (value, value, input));
19320       else
19321 	emit_insn (gen_sse2_movsd (value, value, input));
19322     }
19323 
19324   emit_move_insn (large, two31);
19325   emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19326 
19327   x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19328   emit_insn (gen_rtx_SET (VOIDmode, large, x));
19329 
19330   x = gen_rtx_AND (vecmode, zero_or_two31, large);
19331   emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19332 
19333   x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19334   emit_insn (gen_rtx_SET (VOIDmode, value, x));
19335 
19336   large = gen_rtx_REG (V4SImode, REGNO (large));
19337   emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19338 
19339   x = gen_rtx_REG (V4SImode, REGNO (value));
19340   if (vecmode == V4SFmode)
19341     emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19342   else
19343     emit_insn (gen_sse2_cvttpd2dq (x, value));
19344   value = x;
19345 
19346   emit_insn (gen_xorv4si3 (value, value, large));
19347 }
19348 
19349 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19350    Expects the 64-bit DImode to be supplied in a pair of integral
19351    registers.  Requires SSE2; will use SSE3 if available.  For x86_32,
19352    -mfpmath=sse, !optimize_size only.  */
19353 
19354 void
19355 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19356 {
19357   REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19358   rtx int_xmm, fp_xmm;
19359   rtx biases, exponents;
19360   rtx x;
19361 
19362   int_xmm = gen_reg_rtx (V4SImode);
19363   if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19364     emit_insn (gen_movdi_to_sse (int_xmm, input));
19365   else if (TARGET_SSE_SPLIT_REGS)
19366     {
19367       emit_clobber (int_xmm);
19368       emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19369     }
19370   else
19371     {
19372       x = gen_reg_rtx (V2DImode);
19373       ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19374       emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19375     }
19376 
19377   x = gen_rtx_CONST_VECTOR (V4SImode,
19378 			    gen_rtvec (4, GEN_INT (0x43300000UL),
19379 				       GEN_INT (0x45300000UL),
19380 				       const0_rtx, const0_rtx));
19381   exponents = validize_mem (force_const_mem (V4SImode, x));
19382 
19383   /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19384   emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19385 
19386   /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19387      yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19388      Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19389      (0x1.0p84 + double(fp_value_hi_xmm)).
19390      Note these exponents differ by 32.  */
19391 
19392   fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19393 
19394   /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19395      in [0,2**32-1] and [0]+[2**32,2**64-1] respectively.  */
19396   real_ldexp (&bias_lo_rvt, &dconst1, 52);
19397   real_ldexp (&bias_hi_rvt, &dconst1, 84);
19398   biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19399   x = const_double_from_real_value (bias_hi_rvt, DFmode);
19400   biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19401   biases = validize_mem (force_const_mem (V2DFmode, biases));
19402   emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19403 
19404   /* Add the upper and lower DFmode values together.  */
19405   if (TARGET_SSE3)
19406     emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19407   else
19408     {
19409       x = copy_to_mode_reg (V2DFmode, fp_xmm);
19410       emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19411       emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19412     }
19413 
19414   ix86_expand_vector_extract (false, target, fp_xmm, 0);
19415 }
19416 
19417 /* Not used, but eases macroization of patterns.  */
19418 void
19419 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19420 {
19421   gcc_unreachable ();
19422 }
19423 
19424 /* Convert an unsigned SImode value into a DFmode.  Only currently used
19425    for SSE, but applicable anywhere.  */
19426 
19427 void
19428 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19429 {
19430   REAL_VALUE_TYPE TWO31r;
19431   rtx x, fp;
19432 
19433   x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19434 			   NULL, 1, OPTAB_DIRECT);
19435 
19436   fp = gen_reg_rtx (DFmode);
19437   emit_insn (gen_floatsidf2 (fp, x));
19438 
19439   real_ldexp (&TWO31r, &dconst1, 31);
19440   x = const_double_from_real_value (TWO31r, DFmode);
19441 
19442   x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19443   if (x != target)
19444     emit_move_insn (target, x);
19445 }
19446 
19447 /* Convert a signed DImode value into a DFmode.  Only used for SSE in
19448    32-bit mode; otherwise we have a direct convert instruction.  */
19449 
19450 void
19451 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19452 {
19453   REAL_VALUE_TYPE TWO32r;
19454   rtx fp_lo, fp_hi, x;
19455 
19456   fp_lo = gen_reg_rtx (DFmode);
19457   fp_hi = gen_reg_rtx (DFmode);
19458 
19459   emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19460 
19461   real_ldexp (&TWO32r, &dconst1, 32);
19462   x = const_double_from_real_value (TWO32r, DFmode);
19463   fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19464 
19465   ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19466 
19467   x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19468 			   0, OPTAB_DIRECT);
19469   if (x != target)
19470     emit_move_insn (target, x);
19471 }
19472 
19473 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19474    For x86_32, -mfpmath=sse, !optimize_size only.  */
19475 void
19476 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19477 {
19478   REAL_VALUE_TYPE ONE16r;
19479   rtx fp_hi, fp_lo, int_hi, int_lo, x;
19480 
19481   real_ldexp (&ONE16r, &dconst1, 16);
19482   x = const_double_from_real_value (ONE16r, SFmode);
19483   int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19484 				      NULL, 0, OPTAB_DIRECT);
19485   int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19486 				      NULL, 0, OPTAB_DIRECT);
19487   fp_hi = gen_reg_rtx (SFmode);
19488   fp_lo = gen_reg_rtx (SFmode);
19489   emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19490   emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19491   fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19492 			       0, OPTAB_DIRECT);
19493   fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19494 			       0, OPTAB_DIRECT);
19495   if (!rtx_equal_p (target, fp_hi))
19496     emit_move_insn (target, fp_hi);
19497 }
19498 
19499 /* floatunsv{4,8}siv{4,8}sf2 expander.  Expand code to convert
19500    a vector of unsigned ints VAL to vector of floats TARGET.  */
19501 
19502 void
19503 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19504 {
19505   rtx tmp[8];
19506   REAL_VALUE_TYPE TWO16r;
19507   machine_mode intmode = GET_MODE (val);
19508   machine_mode fltmode = GET_MODE (target);
19509   rtx (*cvt) (rtx, rtx);
19510 
19511   if (intmode == V4SImode)
19512     cvt = gen_floatv4siv4sf2;
19513   else
19514     cvt = gen_floatv8siv8sf2;
19515   tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19516   tmp[0] = force_reg (intmode, tmp[0]);
19517   tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19518 				OPTAB_DIRECT);
19519   tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19520 				NULL_RTX, 1, OPTAB_DIRECT);
19521   tmp[3] = gen_reg_rtx (fltmode);
19522   emit_insn (cvt (tmp[3], tmp[1]));
19523   tmp[4] = gen_reg_rtx (fltmode);
19524   emit_insn (cvt (tmp[4], tmp[2]));
19525   real_ldexp (&TWO16r, &dconst1, 16);
19526   tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19527   tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19528   tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19529 				OPTAB_DIRECT);
19530   tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19531 				OPTAB_DIRECT);
19532   if (tmp[7] != target)
19533     emit_move_insn (target, tmp[7]);
19534 }
19535 
19536 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19537    pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19538    This is done by doing just signed conversion if < 0x1p31, and otherwise by
19539    subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards.  */
19540 
19541 rtx
19542 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19543 {
19544   REAL_VALUE_TYPE TWO31r;
19545   rtx two31r, tmp[4];
19546   machine_mode mode = GET_MODE (val);
19547   machine_mode scalarmode = GET_MODE_INNER (mode);
19548   machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19549   rtx (*cmp) (rtx, rtx, rtx, rtx);
19550   int i;
19551 
19552   for (i = 0; i < 3; i++)
19553     tmp[i] = gen_reg_rtx (mode);
19554   real_ldexp (&TWO31r, &dconst1, 31);
19555   two31r = const_double_from_real_value (TWO31r, scalarmode);
19556   two31r = ix86_build_const_vector (mode, 1, two31r);
19557   two31r = force_reg (mode, two31r);
19558   switch (mode)
19559     {
19560     case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19561     case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19562     case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19563     case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19564     default: gcc_unreachable ();
19565     }
19566   tmp[3] = gen_rtx_LE (mode, two31r, val);
19567   emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19568   tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19569 				0, OPTAB_DIRECT);
19570   if (intmode == V4SImode || TARGET_AVX2)
19571     *xorp = expand_simple_binop (intmode, ASHIFT,
19572 				 gen_lowpart (intmode, tmp[0]),
19573 				 GEN_INT (31), NULL_RTX, 0,
19574 				 OPTAB_DIRECT);
19575   else
19576     {
19577       rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19578       two31 = ix86_build_const_vector (intmode, 1, two31);
19579       *xorp = expand_simple_binop (intmode, AND,
19580 				   gen_lowpart (intmode, tmp[0]),
19581 				   two31, NULL_RTX, 0,
19582 				   OPTAB_DIRECT);
19583     }
19584   return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19585 			      0, OPTAB_DIRECT);
19586 }
19587 
19588 /* A subroutine of ix86_build_signbit_mask.  If VECT is true,
19589    then replicate the value for all elements of the vector
19590    register.  */
19591 
19592 rtx
19593 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19594 {
19595   int i, n_elt;
19596   rtvec v;
19597   machine_mode scalar_mode;
19598 
19599   switch (mode)
19600     {
19601     case V64QImode:
19602     case V32QImode:
19603     case V16QImode:
19604     case V32HImode:
19605     case V16HImode:
19606     case V8HImode:
19607     case V16SImode:
19608     case V8SImode:
19609     case V4SImode:
19610     case V8DImode:
19611     case V4DImode:
19612     case V2DImode:
19613       gcc_assert (vect);
19614     case V16SFmode:
19615     case V8SFmode:
19616     case V4SFmode:
19617     case V8DFmode:
19618     case V4DFmode:
19619     case V2DFmode:
19620       n_elt = GET_MODE_NUNITS (mode);
19621       v = rtvec_alloc (n_elt);
19622       scalar_mode = GET_MODE_INNER (mode);
19623 
19624       RTVEC_ELT (v, 0) = value;
19625 
19626       for (i = 1; i < n_elt; ++i)
19627 	RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19628 
19629       return gen_rtx_CONST_VECTOR (mode, v);
19630 
19631     default:
19632       gcc_unreachable ();
19633     }
19634 }
19635 
19636 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19637    and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
19638    for an SSE register.  If VECT is true, then replicate the mask for
19639    all elements of the vector register.  If INVERT is true, then create
19640    a mask excluding the sign bit.  */
19641 
19642 rtx
19643 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19644 {
19645   machine_mode vec_mode, imode;
19646   HOST_WIDE_INT hi, lo;
19647   int shift = 63;
19648   rtx v;
19649   rtx mask;
19650 
19651   /* Find the sign bit, sign extended to 2*HWI.  */
19652   switch (mode)
19653     {
19654     case V16SImode:
19655     case V16SFmode:
19656     case V8SImode:
19657     case V4SImode:
19658     case V8SFmode:
19659     case V4SFmode:
19660       vec_mode = mode;
19661       mode = GET_MODE_INNER (mode);
19662       imode = SImode;
19663       lo = 0x80000000, hi = lo < 0;
19664       break;
19665 
19666     case V8DImode:
19667     case V4DImode:
19668     case V2DImode:
19669     case V8DFmode:
19670     case V4DFmode:
19671     case V2DFmode:
19672       vec_mode = mode;
19673       mode = GET_MODE_INNER (mode);
19674       imode = DImode;
19675       if (HOST_BITS_PER_WIDE_INT >= 64)
19676 	lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19677       else
19678 	lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19679       break;
19680 
19681     case TImode:
19682     case TFmode:
19683       vec_mode = VOIDmode;
19684       if (HOST_BITS_PER_WIDE_INT >= 64)
19685 	{
19686 	  imode = TImode;
19687 	  lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19688 	}
19689       else
19690 	{
19691 	  rtvec vec;
19692 
19693 	  imode = DImode;
19694 	  lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19695 
19696 	  if (invert)
19697 	    {
19698 	      lo = ~lo, hi = ~hi;
19699 	      v = constm1_rtx;
19700 	    }
19701 	  else
19702 	    v = const0_rtx;
19703 
19704 	  mask = immed_double_const (lo, hi, imode);
19705 
19706 	  vec = gen_rtvec (2, v, mask);
19707 	  v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19708 	  v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19709 
19710 	  return v;
19711 	}
19712      break;
19713 
19714     default:
19715       gcc_unreachable ();
19716     }
19717 
19718   if (invert)
19719     lo = ~lo, hi = ~hi;
19720 
19721   /* Force this value into the low part of a fp vector constant.  */
19722   mask = immed_double_const (lo, hi, imode);
19723   mask = gen_lowpart (mode, mask);
19724 
19725   if (vec_mode == VOIDmode)
19726     return force_reg (mode, mask);
19727 
19728   v = ix86_build_const_vector (vec_mode, vect, mask);
19729   return force_reg (vec_mode, v);
19730 }
19731 
19732 /* Generate code for floating point ABS or NEG.  */
19733 
19734 void
19735 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19736 				rtx operands[])
19737 {
19738   rtx mask, set, dst, src;
19739   bool use_sse = false;
19740   bool vector_mode = VECTOR_MODE_P (mode);
19741   machine_mode vmode = mode;
19742 
19743   if (vector_mode)
19744     use_sse = true;
19745   else if (mode == TFmode)
19746     use_sse = true;
19747   else if (TARGET_SSE_MATH)
19748     {
19749       use_sse = SSE_FLOAT_MODE_P (mode);
19750       if (mode == SFmode)
19751 	vmode = V4SFmode;
19752       else if (mode == DFmode)
19753 	vmode = V2DFmode;
19754     }
19755 
19756   /* NEG and ABS performed with SSE use bitwise mask operations.
19757      Create the appropriate mask now.  */
19758   if (use_sse)
19759     mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19760   else
19761     mask = NULL_RTX;
19762 
19763   dst = operands[0];
19764   src = operands[1];
19765 
19766   set = gen_rtx_fmt_e (code, mode, src);
19767   set = gen_rtx_SET (VOIDmode, dst, set);
19768 
19769   if (mask)
19770     {
19771       rtx use, clob;
19772       rtvec par;
19773 
19774       use = gen_rtx_USE (VOIDmode, mask);
19775       if (vector_mode)
19776 	par = gen_rtvec (2, set, use);
19777       else
19778 	{
19779           clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19780 	  par = gen_rtvec (3, set, use, clob);
19781         }
19782       emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19783     }
19784   else
19785     emit_insn (set);
19786 }
19787 
19788 /* Expand a copysign operation.  Special case operand 0 being a constant.  */
19789 
19790 void
19791 ix86_expand_copysign (rtx operands[])
19792 {
19793   machine_mode mode, vmode;
19794   rtx dest, op0, op1, mask, nmask;
19795 
19796   dest = operands[0];
19797   op0 = operands[1];
19798   op1 = operands[2];
19799 
19800   mode = GET_MODE (dest);
19801 
19802   if (mode == SFmode)
19803     vmode = V4SFmode;
19804   else if (mode == DFmode)
19805     vmode = V2DFmode;
19806   else
19807     vmode = mode;
19808 
19809   if (GET_CODE (op0) == CONST_DOUBLE)
19810     {
19811       rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19812 
19813       if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19814 	op0 = simplify_unary_operation (ABS, mode, op0, mode);
19815 
19816       if (mode == SFmode || mode == DFmode)
19817 	{
19818 	  if (op0 == CONST0_RTX (mode))
19819 	    op0 = CONST0_RTX (vmode);
19820 	  else
19821 	    {
19822 	      rtx v = ix86_build_const_vector (vmode, false, op0);
19823 
19824 	      op0 = force_reg (vmode, v);
19825 	    }
19826 	}
19827       else if (op0 != CONST0_RTX (mode))
19828 	op0 = force_reg (mode, op0);
19829 
19830       mask = ix86_build_signbit_mask (vmode, 0, 0);
19831 
19832       if (mode == SFmode)
19833 	copysign_insn = gen_copysignsf3_const;
19834       else if (mode == DFmode)
19835 	copysign_insn = gen_copysigndf3_const;
19836       else
19837 	copysign_insn = gen_copysigntf3_const;
19838 
19839 	emit_insn (copysign_insn (dest, op0, op1, mask));
19840     }
19841   else
19842     {
19843       rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19844 
19845       nmask = ix86_build_signbit_mask (vmode, 0, 1);
19846       mask = ix86_build_signbit_mask (vmode, 0, 0);
19847 
19848       if (mode == SFmode)
19849 	copysign_insn = gen_copysignsf3_var;
19850       else if (mode == DFmode)
19851 	copysign_insn = gen_copysigndf3_var;
19852       else
19853 	copysign_insn = gen_copysigntf3_var;
19854 
19855       emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19856     }
19857 }
19858 
19859 /* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
19860    be a constant, and so has already been expanded into a vector constant.  */
19861 
19862 void
19863 ix86_split_copysign_const (rtx operands[])
19864 {
19865   machine_mode mode, vmode;
19866   rtx dest, op0, mask, x;
19867 
19868   dest = operands[0];
19869   op0 = operands[1];
19870   mask = operands[3];
19871 
19872   mode = GET_MODE (dest);
19873   vmode = GET_MODE (mask);
19874 
19875   dest = simplify_gen_subreg (vmode, dest, mode, 0);
19876   x = gen_rtx_AND (vmode, dest, mask);
19877   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19878 
19879   if (op0 != CONST0_RTX (vmode))
19880     {
19881       x = gen_rtx_IOR (vmode, dest, op0);
19882       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19883     }
19884 }
19885 
19886 /* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
19887    so we have to do two masks.  */
19888 
19889 void
19890 ix86_split_copysign_var (rtx operands[])
19891 {
19892   machine_mode mode, vmode;
19893   rtx dest, scratch, op0, op1, mask, nmask, x;
19894 
19895   dest = operands[0];
19896   scratch = operands[1];
19897   op0 = operands[2];
19898   op1 = operands[3];
19899   nmask = operands[4];
19900   mask = operands[5];
19901 
19902   mode = GET_MODE (dest);
19903   vmode = GET_MODE (mask);
19904 
19905   if (rtx_equal_p (op0, op1))
19906     {
19907       /* Shouldn't happen often (it's useless, obviously), but when it does
19908 	 we'd generate incorrect code if we continue below.  */
19909       emit_move_insn (dest, op0);
19910       return;
19911     }
19912 
19913   if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
19914     {
19915       gcc_assert (REGNO (op1) == REGNO (scratch));
19916 
19917       x = gen_rtx_AND (vmode, scratch, mask);
19918       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19919 
19920       dest = mask;
19921       op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19922       x = gen_rtx_NOT (vmode, dest);
19923       x = gen_rtx_AND (vmode, x, op0);
19924       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19925     }
19926   else
19927     {
19928       if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
19929 	{
19930 	  x = gen_rtx_AND (vmode, scratch, mask);
19931 	}
19932       else						/* alternative 2,4 */
19933 	{
19934           gcc_assert (REGNO (mask) == REGNO (scratch));
19935           op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19936 	  x = gen_rtx_AND (vmode, scratch, op1);
19937 	}
19938       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19939 
19940       if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
19941 	{
19942 	  dest = simplify_gen_subreg (vmode, op0, mode, 0);
19943 	  x = gen_rtx_AND (vmode, dest, nmask);
19944 	}
19945       else						/* alternative 3,4 */
19946 	{
19947           gcc_assert (REGNO (nmask) == REGNO (dest));
19948 	  dest = nmask;
19949 	  op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19950 	  x = gen_rtx_AND (vmode, dest, op0);
19951 	}
19952       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19953     }
19954 
19955   x = gen_rtx_IOR (vmode, dest, scratch);
19956   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19957 }
19958 
19959 /* Return TRUE or FALSE depending on whether the first SET in INSN
19960    has source and destination with matching CC modes, and that the
19961    CC mode is at least as constrained as REQ_MODE.  */
19962 
19963 bool
19964 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19965 {
19966   rtx set;
19967   machine_mode set_mode;
19968 
19969   set = PATTERN (insn);
19970   if (GET_CODE (set) == PARALLEL)
19971     set = XVECEXP (set, 0, 0);
19972   gcc_assert (GET_CODE (set) == SET);
19973   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19974 
19975   set_mode = GET_MODE (SET_DEST (set));
19976   switch (set_mode)
19977     {
19978     case CCNOmode:
19979       if (req_mode != CCNOmode
19980 	  && (req_mode != CCmode
19981 	      || XEXP (SET_SRC (set), 1) != const0_rtx))
19982 	return false;
19983       break;
19984     case CCmode:
19985       if (req_mode == CCGCmode)
19986 	return false;
19987       /* FALLTHRU */
19988     case CCGCmode:
19989       if (req_mode == CCGOCmode || req_mode == CCNOmode)
19990 	return false;
19991       /* FALLTHRU */
19992     case CCGOCmode:
19993       if (req_mode == CCZmode)
19994 	return false;
19995       /* FALLTHRU */
19996     case CCZmode:
19997       break;
19998 
19999     case CCAmode:
20000     case CCCmode:
20001     case CCOmode:
20002     case CCSmode:
20003       if (set_mode != req_mode)
20004 	return false;
20005       break;
20006 
20007     default:
20008       gcc_unreachable ();
20009     }
20010 
20011   return GET_MODE (SET_SRC (set)) == set_mode;
20012 }
20013 
20014 /* Generate insn patterns to do an integer compare of OPERANDS.  */
20015 
20016 static rtx
20017 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
20018 {
20019   machine_mode cmpmode;
20020   rtx tmp, flags;
20021 
20022   cmpmode = SELECT_CC_MODE (code, op0, op1);
20023   flags = gen_rtx_REG (cmpmode, FLAGS_REG);
20024 
20025   /* This is very simple, but making the interface the same as in the
20026      FP case makes the rest of the code easier.  */
20027   tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
20028   emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
20029 
20030   /* Return the test that should be put into the flags user, i.e.
20031      the bcc, scc, or cmov instruction.  */
20032   return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
20033 }
20034 
20035 /* Figure out whether to use ordered or unordered fp comparisons.
20036    Return the appropriate mode to use.  */
20037 
20038 machine_mode
20039 ix86_fp_compare_mode (enum rtx_code)
20040 {
20041   /* ??? In order to make all comparisons reversible, we do all comparisons
20042      non-trapping when compiling for IEEE.  Once gcc is able to distinguish
20043      all forms trapping and nontrapping comparisons, we can make inequality
20044      comparisons trapping again, since it results in better code when using
20045      FCOM based compares.  */
20046   return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
20047 }
20048 
20049 machine_mode
20050 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
20051 {
20052   machine_mode mode = GET_MODE (op0);
20053 
20054   if (SCALAR_FLOAT_MODE_P (mode))
20055     {
20056       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20057       return ix86_fp_compare_mode (code);
20058     }
20059 
20060   switch (code)
20061     {
20062       /* Only zero flag is needed.  */
20063     case EQ:			/* ZF=0 */
20064     case NE:			/* ZF!=0 */
20065       return CCZmode;
20066       /* Codes needing carry flag.  */
20067     case GEU:			/* CF=0 */
20068     case LTU:			/* CF=1 */
20069       /* Detect overflow checks.  They need just the carry flag.  */
20070       if (GET_CODE (op0) == PLUS
20071 	  && rtx_equal_p (op1, XEXP (op0, 0)))
20072 	return CCCmode;
20073       else
20074 	return CCmode;
20075     case GTU:			/* CF=0 & ZF=0 */
20076     case LEU:			/* CF=1 | ZF=1 */
20077       return CCmode;
20078       /* Codes possibly doable only with sign flag when
20079          comparing against zero.  */
20080     case GE:			/* SF=OF   or   SF=0 */
20081     case LT:			/* SF<>OF  or   SF=1 */
20082       if (op1 == const0_rtx)
20083 	return CCGOCmode;
20084       else
20085 	/* For other cases Carry flag is not required.  */
20086 	return CCGCmode;
20087       /* Codes doable only with sign flag when comparing
20088          against zero, but we miss jump instruction for it
20089          so we need to use relational tests against overflow
20090          that thus needs to be zero.  */
20091     case GT:			/* ZF=0 & SF=OF */
20092     case LE:			/* ZF=1 | SF<>OF */
20093       if (op1 == const0_rtx)
20094 	return CCNOmode;
20095       else
20096 	return CCGCmode;
20097       /* strcmp pattern do (use flags) and combine may ask us for proper
20098 	 mode.  */
20099     case USE:
20100       return CCmode;
20101     default:
20102       gcc_unreachable ();
20103     }
20104 }
20105 
20106 /* Return the fixed registers used for condition codes.  */
20107 
20108 static bool
20109 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
20110 {
20111   *p1 = FLAGS_REG;
20112   *p2 = FPSR_REG;
20113   return true;
20114 }
20115 
20116 /* If two condition code modes are compatible, return a condition code
20117    mode which is compatible with both.  Otherwise, return
20118    VOIDmode.  */
20119 
20120 static machine_mode
20121 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
20122 {
20123   if (m1 == m2)
20124     return m1;
20125 
20126   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
20127     return VOIDmode;
20128 
20129   if ((m1 == CCGCmode && m2 == CCGOCmode)
20130       || (m1 == CCGOCmode && m2 == CCGCmode))
20131     return CCGCmode;
20132 
20133   if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
20134     return m2;
20135   else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
20136     return m1;
20137 
20138   switch (m1)
20139     {
20140     default:
20141       gcc_unreachable ();
20142 
20143     case CCmode:
20144     case CCGCmode:
20145     case CCGOCmode:
20146     case CCNOmode:
20147     case CCAmode:
20148     case CCCmode:
20149     case CCOmode:
20150     case CCSmode:
20151     case CCZmode:
20152       switch (m2)
20153 	{
20154 	default:
20155 	  return VOIDmode;
20156 
20157 	case CCmode:
20158 	case CCGCmode:
20159 	case CCGOCmode:
20160 	case CCNOmode:
20161 	case CCAmode:
20162 	case CCCmode:
20163 	case CCOmode:
20164 	case CCSmode:
20165 	case CCZmode:
20166 	  return CCmode;
20167 	}
20168 
20169     case CCFPmode:
20170     case CCFPUmode:
20171       /* These are only compatible with themselves, which we already
20172 	 checked above.  */
20173       return VOIDmode;
20174     }
20175 }
20176 
20177 
20178 /* Return a comparison we can do and that it is equivalent to
20179    swap_condition (code) apart possibly from orderedness.
20180    But, never change orderedness if TARGET_IEEE_FP, returning
20181    UNKNOWN in that case if necessary.  */
20182 
20183 static enum rtx_code
20184 ix86_fp_swap_condition (enum rtx_code code)
20185 {
20186   switch (code)
20187     {
20188     case GT:                   /* GTU - CF=0 & ZF=0 */
20189       return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20190     case GE:                   /* GEU - CF=0 */
20191       return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20192     case UNLT:                 /* LTU - CF=1 */
20193       return TARGET_IEEE_FP ? UNKNOWN : GT;
20194     case UNLE:                 /* LEU - CF=1 | ZF=1 */
20195       return TARGET_IEEE_FP ? UNKNOWN : GE;
20196     default:
20197       return swap_condition (code);
20198     }
20199 }
20200 
20201 /* Return cost of comparison CODE using the best strategy for performance.
20202    All following functions do use number of instructions as a cost metrics.
20203    In future this should be tweaked to compute bytes for optimize_size and
20204    take into account performance of various instructions on various CPUs.  */
20205 
20206 static int
20207 ix86_fp_comparison_cost (enum rtx_code code)
20208 {
20209   int arith_cost;
20210 
20211   /* The cost of code using bit-twiddling on %ah.  */
20212   switch (code)
20213     {
20214     case UNLE:
20215     case UNLT:
20216     case LTGT:
20217     case GT:
20218     case GE:
20219     case UNORDERED:
20220     case ORDERED:
20221     case UNEQ:
20222       arith_cost = 4;
20223       break;
20224     case LT:
20225     case NE:
20226     case EQ:
20227     case UNGE:
20228       arith_cost = TARGET_IEEE_FP ? 5 : 4;
20229       break;
20230     case LE:
20231     case UNGT:
20232       arith_cost = TARGET_IEEE_FP ? 6 : 4;
20233       break;
20234     default:
20235       gcc_unreachable ();
20236     }
20237 
20238   switch (ix86_fp_comparison_strategy (code))
20239     {
20240     case IX86_FPCMP_COMI:
20241       return arith_cost > 4 ? 3 : 2;
20242     case IX86_FPCMP_SAHF:
20243       return arith_cost > 4 ? 4 : 3;
20244     default:
20245       return arith_cost;
20246     }
20247 }
20248 
20249 /* Return strategy to use for floating-point.  We assume that fcomi is always
20250    preferrable where available, since that is also true when looking at size
20251    (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test).  */
20252 
20253 enum ix86_fpcmp_strategy
20254 ix86_fp_comparison_strategy (enum rtx_code)
20255 {
20256   /* Do fcomi/sahf based test when profitable.  */
20257 
20258   if (TARGET_CMOVE)
20259     return IX86_FPCMP_COMI;
20260 
20261   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20262     return IX86_FPCMP_SAHF;
20263 
20264   return IX86_FPCMP_ARITH;
20265 }
20266 
20267 /* Swap, force into registers, or otherwise massage the two operands
20268    to a fp comparison.  The operands are updated in place; the new
20269    comparison code is returned.  */
20270 
20271 static enum rtx_code
20272 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20273 {
20274   machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20275   rtx op0 = *pop0, op1 = *pop1;
20276   machine_mode op_mode = GET_MODE (op0);
20277   int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20278 
20279   /* All of the unordered compare instructions only work on registers.
20280      The same is true of the fcomi compare instructions.  The XFmode
20281      compare instructions require registers except when comparing
20282      against zero or when converting operand 1 from fixed point to
20283      floating point.  */
20284 
20285   if (!is_sse
20286       && (fpcmp_mode == CCFPUmode
20287 	  || (op_mode == XFmode
20288 	      && ! (standard_80387_constant_p (op0) == 1
20289 		    || standard_80387_constant_p (op1) == 1)
20290 	      && GET_CODE (op1) != FLOAT)
20291 	  || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20292     {
20293       op0 = force_reg (op_mode, op0);
20294       op1 = force_reg (op_mode, op1);
20295     }
20296   else
20297     {
20298       /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
20299 	 things around if they appear profitable, otherwise force op0
20300 	 into a register.  */
20301 
20302       if (standard_80387_constant_p (op0) == 0
20303 	  || (MEM_P (op0)
20304 	      && ! (standard_80387_constant_p (op1) == 0
20305 		    || MEM_P (op1))))
20306 	{
20307 	  enum rtx_code new_code = ix86_fp_swap_condition (code);
20308 	  if (new_code != UNKNOWN)
20309 	    {
20310 	      std::swap (op0, op1);
20311 	      code = new_code;
20312 	    }
20313 	}
20314 
20315       if (!REG_P (op0))
20316 	op0 = force_reg (op_mode, op0);
20317 
20318       if (CONSTANT_P (op1))
20319 	{
20320 	  int tmp = standard_80387_constant_p (op1);
20321 	  if (tmp == 0)
20322 	    op1 = validize_mem (force_const_mem (op_mode, op1));
20323 	  else if (tmp == 1)
20324 	    {
20325 	      if (TARGET_CMOVE)
20326 		op1 = force_reg (op_mode, op1);
20327 	    }
20328 	  else
20329 	    op1 = force_reg (op_mode, op1);
20330 	}
20331     }
20332 
20333   /* Try to rearrange the comparison to make it cheaper.  */
20334   if (ix86_fp_comparison_cost (code)
20335       > ix86_fp_comparison_cost (swap_condition (code))
20336       && (REG_P (op1) || can_create_pseudo_p ()))
20337     {
20338       std::swap (op0, op1);
20339       code = swap_condition (code);
20340       if (!REG_P (op0))
20341 	op0 = force_reg (op_mode, op0);
20342     }
20343 
20344   *pop0 = op0;
20345   *pop1 = op1;
20346   return code;
20347 }
20348 
20349 /* Convert comparison codes we use to represent FP comparison to integer
20350    code that will result in proper branch.  Return UNKNOWN if no such code
20351    is available.  */
20352 
20353 enum rtx_code
20354 ix86_fp_compare_code_to_integer (enum rtx_code code)
20355 {
20356   switch (code)
20357     {
20358     case GT:
20359       return GTU;
20360     case GE:
20361       return GEU;
20362     case ORDERED:
20363     case UNORDERED:
20364       return code;
20365       break;
20366     case UNEQ:
20367       return EQ;
20368       break;
20369     case UNLT:
20370       return LTU;
20371       break;
20372     case UNLE:
20373       return LEU;
20374       break;
20375     case LTGT:
20376       return NE;
20377       break;
20378     default:
20379       return UNKNOWN;
20380     }
20381 }
20382 
20383 /* Generate insn patterns to do a floating point compare of OPERANDS.  */
20384 
20385 static rtx
20386 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20387 {
20388   machine_mode fpcmp_mode, intcmp_mode;
20389   rtx tmp, tmp2;
20390 
20391   fpcmp_mode = ix86_fp_compare_mode (code);
20392   code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20393 
20394   /* Do fcomi/sahf based test when profitable.  */
20395   switch (ix86_fp_comparison_strategy (code))
20396     {
20397     case IX86_FPCMP_COMI:
20398       intcmp_mode = fpcmp_mode;
20399       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20400       tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20401 			 tmp);
20402       emit_insn (tmp);
20403       break;
20404 
20405     case IX86_FPCMP_SAHF:
20406       intcmp_mode = fpcmp_mode;
20407       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20408       tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20409 			 tmp);
20410 
20411       if (!scratch)
20412 	scratch = gen_reg_rtx (HImode);
20413       tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20414       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20415       break;
20416 
20417     case IX86_FPCMP_ARITH:
20418       /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
20419       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20420       tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20421       if (!scratch)
20422 	scratch = gen_reg_rtx (HImode);
20423       emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20424 
20425       /* In the unordered case, we have to check C2 for NaN's, which
20426 	 doesn't happen to work out to anything nice combination-wise.
20427 	 So do some bit twiddling on the value we've got in AH to come
20428 	 up with an appropriate set of condition codes.  */
20429 
20430       intcmp_mode = CCNOmode;
20431       switch (code)
20432 	{
20433 	case GT:
20434 	case UNGT:
20435 	  if (code == GT || !TARGET_IEEE_FP)
20436 	    {
20437 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20438 	      code = EQ;
20439 	    }
20440 	  else
20441 	    {
20442 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20443 	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20444 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20445 	      intcmp_mode = CCmode;
20446 	      code = GEU;
20447 	    }
20448 	  break;
20449 	case LT:
20450 	case UNLT:
20451 	  if (code == LT && TARGET_IEEE_FP)
20452 	    {
20453 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20454 	      emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20455 	      intcmp_mode = CCmode;
20456 	      code = EQ;
20457 	    }
20458 	  else
20459 	    {
20460 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20461 	      code = NE;
20462 	    }
20463 	  break;
20464 	case GE:
20465 	case UNGE:
20466 	  if (code == GE || !TARGET_IEEE_FP)
20467 	    {
20468 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20469 	      code = EQ;
20470 	    }
20471 	  else
20472 	    {
20473 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20474 	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20475 	      code = NE;
20476 	    }
20477 	  break;
20478 	case LE:
20479 	case UNLE:
20480 	  if (code == LE && TARGET_IEEE_FP)
20481 	    {
20482 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20483 	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20484 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20485 	      intcmp_mode = CCmode;
20486 	      code = LTU;
20487 	    }
20488 	  else
20489 	    {
20490 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20491 	      code = NE;
20492 	    }
20493 	  break;
20494 	case EQ:
20495 	case UNEQ:
20496 	  if (code == EQ && TARGET_IEEE_FP)
20497 	    {
20498 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20499 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20500 	      intcmp_mode = CCmode;
20501 	      code = EQ;
20502 	    }
20503 	  else
20504 	    {
20505 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20506 	      code = NE;
20507 	    }
20508 	  break;
20509 	case NE:
20510 	case LTGT:
20511 	  if (code == NE && TARGET_IEEE_FP)
20512 	    {
20513 	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20514 	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20515 					     GEN_INT (0x40)));
20516 	      code = NE;
20517 	    }
20518 	  else
20519 	    {
20520 	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20521 	      code = EQ;
20522 	    }
20523 	  break;
20524 
20525 	case UNORDERED:
20526 	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20527 	  code = NE;
20528 	  break;
20529 	case ORDERED:
20530 	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20531 	  code = EQ;
20532 	  break;
20533 
20534 	default:
20535 	  gcc_unreachable ();
20536 	}
20537 	break;
20538 
20539     default:
20540       gcc_unreachable();
20541     }
20542 
20543   /* Return the test that should be put into the flags user, i.e.
20544      the bcc, scc, or cmov instruction.  */
20545   return gen_rtx_fmt_ee (code, VOIDmode,
20546 			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20547 			 const0_rtx);
20548 }
20549 
20550 static rtx
20551 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20552 {
20553   rtx ret;
20554 
20555   if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20556     ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20557 
20558   else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20559     {
20560       gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20561       ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20562     }
20563   else
20564     ret = ix86_expand_int_compare (code, op0, op1);
20565 
20566   return ret;
20567 }
20568 
20569 void
20570 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20571 {
20572   machine_mode mode = GET_MODE (op0);
20573   rtx tmp;
20574 
20575   switch (mode)
20576     {
20577     case SFmode:
20578     case DFmode:
20579     case XFmode:
20580     case QImode:
20581     case HImode:
20582     case SImode:
20583       simple:
20584       tmp = ix86_expand_compare (code, op0, op1);
20585       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20586 				  gen_rtx_LABEL_REF (VOIDmode, label),
20587 				  pc_rtx);
20588       emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20589       return;
20590 
20591     case DImode:
20592       if (TARGET_64BIT)
20593 	goto simple;
20594     case TImode:
20595       /* Expand DImode branch into multiple compare+branch.  */
20596       {
20597 	rtx lo[2], hi[2];
20598 	rtx_code_label *label2;
20599 	enum rtx_code code1, code2, code3;
20600 	machine_mode submode;
20601 
20602 	if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20603 	  {
20604 	    std::swap (op0, op1);
20605 	    code = swap_condition (code);
20606 	  }
20607 
20608 	split_double_mode (mode, &op0, 1, lo+0, hi+0);
20609 	split_double_mode (mode, &op1, 1, lo+1, hi+1);
20610 
20611 	submode = mode == DImode ? SImode : DImode;
20612 
20613 	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20614 	   avoid two branches.  This costs one extra insn, so disable when
20615 	   optimizing for size.  */
20616 
20617 	if ((code == EQ || code == NE)
20618 	    && (!optimize_insn_for_size_p ()
20619 	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
20620 	  {
20621 	    rtx xor0, xor1;
20622 
20623 	    xor1 = hi[0];
20624 	    if (hi[1] != const0_rtx)
20625 	      xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20626 				   NULL_RTX, 0, OPTAB_WIDEN);
20627 
20628 	    xor0 = lo[0];
20629 	    if (lo[1] != const0_rtx)
20630 	      xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20631 				   NULL_RTX, 0, OPTAB_WIDEN);
20632 
20633 	    tmp = expand_binop (submode, ior_optab, xor1, xor0,
20634 				NULL_RTX, 0, OPTAB_WIDEN);
20635 
20636 	    ix86_expand_branch (code, tmp, const0_rtx, label);
20637 	    return;
20638 	  }
20639 
20640 	/* Otherwise, if we are doing less-than or greater-or-equal-than,
20641 	   op1 is a constant and the low word is zero, then we can just
20642 	   examine the high word.  Similarly for low word -1 and
20643 	   less-or-equal-than or greater-than.  */
20644 
20645 	if (CONST_INT_P (hi[1]))
20646 	  switch (code)
20647 	    {
20648 	    case LT: case LTU: case GE: case GEU:
20649 	      if (lo[1] == const0_rtx)
20650 		{
20651 		  ix86_expand_branch (code, hi[0], hi[1], label);
20652 		  return;
20653 		}
20654 	      break;
20655 	    case LE: case LEU: case GT: case GTU:
20656 	      if (lo[1] == constm1_rtx)
20657 		{
20658 		  ix86_expand_branch (code, hi[0], hi[1], label);
20659 		  return;
20660 		}
20661 	      break;
20662 	    default:
20663 	      break;
20664 	    }
20665 
20666 	/* Otherwise, we need two or three jumps.  */
20667 
20668 	label2 = gen_label_rtx ();
20669 
20670 	code1 = code;
20671 	code2 = swap_condition (code);
20672 	code3 = unsigned_condition (code);
20673 
20674 	switch (code)
20675 	  {
20676 	  case LT: case GT: case LTU: case GTU:
20677 	    break;
20678 
20679 	  case LE:   code1 = LT;  code2 = GT;  break;
20680 	  case GE:   code1 = GT;  code2 = LT;  break;
20681 	  case LEU:  code1 = LTU; code2 = GTU; break;
20682 	  case GEU:  code1 = GTU; code2 = LTU; break;
20683 
20684 	  case EQ:   code1 = UNKNOWN; code2 = NE;  break;
20685 	  case NE:   code2 = UNKNOWN; break;
20686 
20687 	  default:
20688 	    gcc_unreachable ();
20689 	  }
20690 
20691 	/*
20692 	 * a < b =>
20693 	 *    if (hi(a) < hi(b)) goto true;
20694 	 *    if (hi(a) > hi(b)) goto false;
20695 	 *    if (lo(a) < lo(b)) goto true;
20696 	 *  false:
20697 	 */
20698 
20699 	if (code1 != UNKNOWN)
20700 	  ix86_expand_branch (code1, hi[0], hi[1], label);
20701 	if (code2 != UNKNOWN)
20702 	  ix86_expand_branch (code2, hi[0], hi[1], label2);
20703 
20704 	ix86_expand_branch (code3, lo[0], lo[1], label);
20705 
20706 	if (code2 != UNKNOWN)
20707 	  emit_label (label2);
20708 	return;
20709       }
20710 
20711     default:
20712       gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20713       goto simple;
20714     }
20715 }
20716 
20717 /* Split branch based on floating point condition.  */
20718 void
20719 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20720 		      rtx target1, rtx target2, rtx tmp)
20721 {
20722   rtx condition;
20723   rtx i;
20724 
20725   if (target2 != pc_rtx)
20726     {
20727       std::swap (target1, target2);
20728       code = reverse_condition_maybe_unordered (code);
20729     }
20730 
20731   condition = ix86_expand_fp_compare (code, op1, op2,
20732 				      tmp);
20733 
20734   i = emit_jump_insn (gen_rtx_SET
20735 		      (VOIDmode, pc_rtx,
20736 		       gen_rtx_IF_THEN_ELSE (VOIDmode,
20737 					     condition, target1, target2)));
20738   if (split_branch_probability >= 0)
20739     add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20740 }
20741 
20742 void
20743 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20744 {
20745   rtx ret;
20746 
20747   gcc_assert (GET_MODE (dest) == QImode);
20748 
20749   ret = ix86_expand_compare (code, op0, op1);
20750   PUT_MODE (ret, QImode);
20751   emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20752 }
20753 
20754 /* Expand comparison setting or clearing carry flag.  Return true when
20755    successful and set pop for the operation.  */
20756 static bool
20757 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20758 {
20759   machine_mode mode =
20760     GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20761 
20762   /* Do not handle double-mode compares that go through special path.  */
20763   if (mode == (TARGET_64BIT ? TImode : DImode))
20764     return false;
20765 
20766   if (SCALAR_FLOAT_MODE_P (mode))
20767     {
20768       rtx compare_op;
20769       rtx_insn *compare_seq;
20770 
20771       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20772 
20773       /* Shortcut:  following common codes never translate
20774 	 into carry flag compares.  */
20775       if (code == EQ || code == NE || code == UNEQ || code == LTGT
20776 	  || code == ORDERED || code == UNORDERED)
20777 	return false;
20778 
20779       /* These comparisons require zero flag; swap operands so they won't.  */
20780       if ((code == GT || code == UNLE || code == LE || code == UNGT)
20781 	  && !TARGET_IEEE_FP)
20782 	{
20783 	  std::swap (op0, op1);
20784 	  code = swap_condition (code);
20785 	}
20786 
20787       /* Try to expand the comparison and verify that we end up with
20788 	 carry flag based comparison.  This fails to be true only when
20789 	 we decide to expand comparison using arithmetic that is not
20790 	 too common scenario.  */
20791       start_sequence ();
20792       compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20793       compare_seq = get_insns ();
20794       end_sequence ();
20795 
20796       if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20797 	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20798         code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20799       else
20800 	code = GET_CODE (compare_op);
20801 
20802       if (code != LTU && code != GEU)
20803 	return false;
20804 
20805       emit_insn (compare_seq);
20806       *pop = compare_op;
20807       return true;
20808     }
20809 
20810   if (!INTEGRAL_MODE_P (mode))
20811     return false;
20812 
20813   switch (code)
20814     {
20815     case LTU:
20816     case GEU:
20817       break;
20818 
20819     /* Convert a==0 into (unsigned)a<1.  */
20820     case EQ:
20821     case NE:
20822       if (op1 != const0_rtx)
20823 	return false;
20824       op1 = const1_rtx;
20825       code = (code == EQ ? LTU : GEU);
20826       break;
20827 
20828     /* Convert a>b into b<a or a>=b-1.  */
20829     case GTU:
20830     case LEU:
20831       if (CONST_INT_P (op1))
20832 	{
20833 	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20834 	  /* Bail out on overflow.  We still can swap operands but that
20835 	     would force loading of the constant into register.  */
20836 	  if (op1 == const0_rtx
20837 	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20838 	    return false;
20839 	  code = (code == GTU ? GEU : LTU);
20840 	}
20841       else
20842 	{
20843 	  std::swap (op0, op1);
20844 	  code = (code == GTU ? LTU : GEU);
20845 	}
20846       break;
20847 
20848     /* Convert a>=0 into (unsigned)a<0x80000000.  */
20849     case LT:
20850     case GE:
20851       if (mode == DImode || op1 != const0_rtx)
20852 	return false;
20853       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20854       code = (code == LT ? GEU : LTU);
20855       break;
20856     case LE:
20857     case GT:
20858       if (mode == DImode || op1 != constm1_rtx)
20859 	return false;
20860       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20861       code = (code == LE ? GEU : LTU);
20862       break;
20863 
20864     default:
20865       return false;
20866     }
20867   /* Swapping operands may cause constant to appear as first operand.  */
20868   if (!nonimmediate_operand (op0, VOIDmode))
20869     {
20870       if (!can_create_pseudo_p ())
20871 	return false;
20872       op0 = force_reg (mode, op0);
20873     }
20874   *pop = ix86_expand_compare (code, op0, op1);
20875   gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20876   return true;
20877 }
20878 
20879 bool
20880 ix86_expand_int_movcc (rtx operands[])
20881 {
20882   enum rtx_code code = GET_CODE (operands[1]), compare_code;
20883   rtx_insn *compare_seq;
20884   rtx compare_op;
20885   machine_mode mode = GET_MODE (operands[0]);
20886   bool sign_bit_compare_p = false;
20887   rtx op0 = XEXP (operands[1], 0);
20888   rtx op1 = XEXP (operands[1], 1);
20889 
20890   if (GET_MODE (op0) == TImode
20891       || (GET_MODE (op0) == DImode
20892 	  && !TARGET_64BIT))
20893     return false;
20894 
20895   start_sequence ();
20896   compare_op = ix86_expand_compare (code, op0, op1);
20897   compare_seq = get_insns ();
20898   end_sequence ();
20899 
20900   compare_code = GET_CODE (compare_op);
20901 
20902   if ((op1 == const0_rtx && (code == GE || code == LT))
20903       || (op1 == constm1_rtx && (code == GT || code == LE)))
20904     sign_bit_compare_p = true;
20905 
20906   /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20907      HImode insns, we'd be swallowed in word prefix ops.  */
20908 
20909   if ((mode != HImode || TARGET_FAST_PREFIX)
20910       && (mode != (TARGET_64BIT ? TImode : DImode))
20911       && CONST_INT_P (operands[2])
20912       && CONST_INT_P (operands[3]))
20913     {
20914       rtx out = operands[0];
20915       HOST_WIDE_INT ct = INTVAL (operands[2]);
20916       HOST_WIDE_INT cf = INTVAL (operands[3]);
20917       HOST_WIDE_INT diff;
20918 
20919       diff = ct - cf;
20920       /*  Sign bit compares are better done using shifts than we do by using
20921 	  sbb.  */
20922       if (sign_bit_compare_p
20923 	  || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20924 	{
20925 	  /* Detect overlap between destination and compare sources.  */
20926 	  rtx tmp = out;
20927 
20928           if (!sign_bit_compare_p)
20929 	    {
20930 	      rtx flags;
20931 	      bool fpcmp = false;
20932 
20933 	      compare_code = GET_CODE (compare_op);
20934 
20935 	      flags = XEXP (compare_op, 0);
20936 
20937 	      if (GET_MODE (flags) == CCFPmode
20938 		  || GET_MODE (flags) == CCFPUmode)
20939 		{
20940 		  fpcmp = true;
20941 		  compare_code
20942 		    = ix86_fp_compare_code_to_integer (compare_code);
20943 		}
20944 
20945 	      /* To simplify rest of code, restrict to the GEU case.  */
20946 	      if (compare_code == LTU)
20947 		{
20948 		  std::swap (ct, cf);
20949 		  compare_code = reverse_condition (compare_code);
20950 		  code = reverse_condition (code);
20951 		}
20952 	      else
20953 		{
20954 		  if (fpcmp)
20955 		    PUT_CODE (compare_op,
20956 			      reverse_condition_maybe_unordered
20957 			        (GET_CODE (compare_op)));
20958 		  else
20959 		    PUT_CODE (compare_op,
20960 			      reverse_condition (GET_CODE (compare_op)));
20961 		}
20962 	      diff = ct - cf;
20963 
20964 	      if (reg_overlap_mentioned_p (out, op0)
20965 		  || reg_overlap_mentioned_p (out, op1))
20966 		tmp = gen_reg_rtx (mode);
20967 
20968 	      if (mode == DImode)
20969 		emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20970 	      else
20971 		emit_insn (gen_x86_movsicc_0_m1	(gen_lowpart (SImode, tmp),
20972 						 flags, compare_op));
20973 	    }
20974 	  else
20975 	    {
20976 	      if (code == GT || code == GE)
20977 		code = reverse_condition (code);
20978 	      else
20979 		{
20980 		  std::swap (ct, cf);
20981 		  diff = ct - cf;
20982 		}
20983 	      tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20984 	    }
20985 
20986 	  if (diff == 1)
20987 	    {
20988 	      /*
20989 	       * cmpl op0,op1
20990 	       * sbbl dest,dest
20991 	       * [addl dest, ct]
20992 	       *
20993 	       * Size 5 - 8.
20994 	       */
20995 	      if (ct)
20996 		tmp = expand_simple_binop (mode, PLUS,
20997 					   tmp, GEN_INT (ct),
20998 					   copy_rtx (tmp), 1, OPTAB_DIRECT);
20999 	    }
21000 	  else if (cf == -1)
21001 	    {
21002 	      /*
21003 	       * cmpl op0,op1
21004 	       * sbbl dest,dest
21005 	       * orl $ct, dest
21006 	       *
21007 	       * Size 8.
21008 	       */
21009 	      tmp = expand_simple_binop (mode, IOR,
21010 					 tmp, GEN_INT (ct),
21011 					 copy_rtx (tmp), 1, OPTAB_DIRECT);
21012 	    }
21013 	  else if (diff == -1 && ct)
21014 	    {
21015 	      /*
21016 	       * cmpl op0,op1
21017 	       * sbbl dest,dest
21018 	       * notl dest
21019 	       * [addl dest, cf]
21020 	       *
21021 	       * Size 8 - 11.
21022 	       */
21023 	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
21024 	      if (cf)
21025 		tmp = expand_simple_binop (mode, PLUS,
21026 					   copy_rtx (tmp), GEN_INT (cf),
21027 					   copy_rtx (tmp), 1, OPTAB_DIRECT);
21028 	    }
21029 	  else
21030 	    {
21031 	      /*
21032 	       * cmpl op0,op1
21033 	       * sbbl dest,dest
21034 	       * [notl dest]
21035 	       * andl cf - ct, dest
21036 	       * [addl dest, ct]
21037 	       *
21038 	       * Size 8 - 11.
21039 	       */
21040 
21041 	      if (cf == 0)
21042 		{
21043 		  cf = ct;
21044 		  ct = 0;
21045 		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
21046 		}
21047 
21048 	      tmp = expand_simple_binop (mode, AND,
21049 					 copy_rtx (tmp),
21050 					 gen_int_mode (cf - ct, mode),
21051 					 copy_rtx (tmp), 1, OPTAB_DIRECT);
21052 	      if (ct)
21053 		tmp = expand_simple_binop (mode, PLUS,
21054 					   copy_rtx (tmp), GEN_INT (ct),
21055 					   copy_rtx (tmp), 1, OPTAB_DIRECT);
21056 	    }
21057 
21058 	  if (!rtx_equal_p (tmp, out))
21059 	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
21060 
21061 	  return true;
21062 	}
21063 
21064       if (diff < 0)
21065 	{
21066 	  machine_mode cmp_mode = GET_MODE (op0);
21067 	  enum rtx_code new_code;
21068 
21069 	  if (SCALAR_FLOAT_MODE_P (cmp_mode))
21070 	    {
21071 	      gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21072 
21073 	      /* We may be reversing unordered compare to normal compare, that
21074 		 is not valid in general (we may convert non-trapping condition
21075 		 to trapping one), however on i386 we currently emit all
21076 		 comparisons unordered.  */
21077 	      new_code = reverse_condition_maybe_unordered (code);
21078 	    }
21079 	  else
21080 	    new_code = ix86_reverse_condition (code, cmp_mode);
21081 	  if (new_code != UNKNOWN)
21082 	    {
21083 	      std::swap (ct, cf);
21084 	      diff = -diff;
21085 	      code = new_code;
21086 	    }
21087 	}
21088 
21089       compare_code = UNKNOWN;
21090       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
21091 	  && CONST_INT_P (op1))
21092 	{
21093 	  if (op1 == const0_rtx
21094 	      && (code == LT || code == GE))
21095 	    compare_code = code;
21096 	  else if (op1 == constm1_rtx)
21097 	    {
21098 	      if (code == LE)
21099 		compare_code = LT;
21100 	      else if (code == GT)
21101 		compare_code = GE;
21102 	    }
21103 	}
21104 
21105       /* Optimize dest = (op0 < 0) ? -1 : cf.  */
21106       if (compare_code != UNKNOWN
21107 	  && GET_MODE (op0) == GET_MODE (out)
21108 	  && (cf == -1 || ct == -1))
21109 	{
21110 	  /* If lea code below could be used, only optimize
21111 	     if it results in a 2 insn sequence.  */
21112 
21113 	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
21114 		 || diff == 3 || diff == 5 || diff == 9)
21115 	      || (compare_code == LT && ct == -1)
21116 	      || (compare_code == GE && cf == -1))
21117 	    {
21118 	      /*
21119 	       * notl op1	(if necessary)
21120 	       * sarl $31, op1
21121 	       * orl cf, op1
21122 	       */
21123 	      if (ct != -1)
21124 		{
21125 		  cf = ct;
21126 		  ct = -1;
21127 		  code = reverse_condition (code);
21128 		}
21129 
21130 	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21131 
21132 	      out = expand_simple_binop (mode, IOR,
21133 					 out, GEN_INT (cf),
21134 					 out, 1, OPTAB_DIRECT);
21135 	      if (out != operands[0])
21136 		emit_move_insn (operands[0], out);
21137 
21138 	      return true;
21139 	    }
21140 	}
21141 
21142 
21143       if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
21144 	   || diff == 3 || diff == 5 || diff == 9)
21145 	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
21146 	  && (mode != DImode
21147 	      || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
21148 	{
21149 	  /*
21150 	   * xorl dest,dest
21151 	   * cmpl op1,op2
21152 	   * setcc dest
21153 	   * lea cf(dest*(ct-cf)),dest
21154 	   *
21155 	   * Size 14.
21156 	   *
21157 	   * This also catches the degenerate setcc-only case.
21158 	   */
21159 
21160 	  rtx tmp;
21161 	  int nops;
21162 
21163 	  out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21164 
21165 	  nops = 0;
21166 	  /* On x86_64 the lea instruction operates on Pmode, so we need
21167 	     to get arithmetics done in proper mode to match.  */
21168 	  if (diff == 1)
21169 	    tmp = copy_rtx (out);
21170 	  else
21171 	    {
21172 	      rtx out1;
21173 	      out1 = copy_rtx (out);
21174 	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
21175 	      nops++;
21176 	      if (diff & 1)
21177 		{
21178 		  tmp = gen_rtx_PLUS (mode, tmp, out1);
21179 		  nops++;
21180 		}
21181 	    }
21182 	  if (cf != 0)
21183 	    {
21184 	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
21185 	      nops++;
21186 	    }
21187 	  if (!rtx_equal_p (tmp, out))
21188 	    {
21189 	      if (nops == 1)
21190 		out = force_operand (tmp, copy_rtx (out));
21191 	      else
21192 		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
21193 	    }
21194 	  if (!rtx_equal_p (out, operands[0]))
21195 	    emit_move_insn (operands[0], copy_rtx (out));
21196 
21197 	  return true;
21198 	}
21199 
21200       /*
21201        * General case:			Jumpful:
21202        *   xorl dest,dest		cmpl op1, op2
21203        *   cmpl op1, op2		movl ct, dest
21204        *   setcc dest			jcc 1f
21205        *   decl dest			movl cf, dest
21206        *   andl (cf-ct),dest		1:
21207        *   addl ct,dest
21208        *
21209        * Size 20.			Size 14.
21210        *
21211        * This is reasonably steep, but branch mispredict costs are
21212        * high on modern cpus, so consider failing only if optimizing
21213        * for space.
21214        */
21215 
21216       if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21217 	  && BRANCH_COST (optimize_insn_for_speed_p (),
21218 		  	  false) >= 2)
21219 	{
21220 	  if (cf == 0)
21221 	    {
21222 	      machine_mode cmp_mode = GET_MODE (op0);
21223 	      enum rtx_code new_code;
21224 
21225 	      if (SCALAR_FLOAT_MODE_P (cmp_mode))
21226 		{
21227 		  gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21228 
21229 		  /* We may be reversing unordered compare to normal compare,
21230 		     that is not valid in general (we may convert non-trapping
21231 		     condition to trapping one), however on i386 we currently
21232 		     emit all comparisons unordered.  */
21233 		  new_code = reverse_condition_maybe_unordered (code);
21234 		}
21235 	      else
21236 		{
21237 		  new_code = ix86_reverse_condition (code, cmp_mode);
21238 		  if (compare_code != UNKNOWN && new_code != UNKNOWN)
21239 		    compare_code = reverse_condition (compare_code);
21240 		}
21241 
21242 	      if (new_code != UNKNOWN)
21243 		{
21244 		  cf = ct;
21245 		  ct = 0;
21246 		  code = new_code;
21247 		}
21248 	    }
21249 
21250 	  if (compare_code != UNKNOWN)
21251 	    {
21252 	      /* notl op1	(if needed)
21253 		 sarl $31, op1
21254 		 andl (cf-ct), op1
21255 		 addl ct, op1
21256 
21257 		 For x < 0 (resp. x <= -1) there will be no notl,
21258 		 so if possible swap the constants to get rid of the
21259 		 complement.
21260 		 True/false will be -1/0 while code below (store flag
21261 		 followed by decrement) is 0/-1, so the constants need
21262 		 to be exchanged once more.  */
21263 
21264 	      if (compare_code == GE || !cf)
21265 		{
21266 		  code = reverse_condition (code);
21267 		  compare_code = LT;
21268 		}
21269 	      else
21270 		std::swap (ct, cf);
21271 
21272 	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21273 	    }
21274 	  else
21275 	    {
21276 	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21277 
21278 	      out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21279 					 constm1_rtx,
21280 					 copy_rtx (out), 1, OPTAB_DIRECT);
21281 	    }
21282 
21283 	  out = expand_simple_binop (mode, AND, copy_rtx (out),
21284 				     gen_int_mode (cf - ct, mode),
21285 				     copy_rtx (out), 1, OPTAB_DIRECT);
21286 	  if (ct)
21287 	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21288 				       copy_rtx (out), 1, OPTAB_DIRECT);
21289 	  if (!rtx_equal_p (out, operands[0]))
21290 	    emit_move_insn (operands[0], copy_rtx (out));
21291 
21292 	  return true;
21293 	}
21294     }
21295 
21296   if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21297     {
21298       /* Try a few things more with specific constants and a variable.  */
21299 
21300       optab op;
21301       rtx var, orig_out, out, tmp;
21302 
21303       if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21304 	return false;
21305 
21306       /* If one of the two operands is an interesting constant, load a
21307 	 constant with the above and mask it in with a logical operation.  */
21308 
21309       if (CONST_INT_P (operands[2]))
21310 	{
21311 	  var = operands[3];
21312 	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21313 	    operands[3] = constm1_rtx, op = and_optab;
21314 	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21315 	    operands[3] = const0_rtx, op = ior_optab;
21316 	  else
21317 	    return false;
21318 	}
21319       else if (CONST_INT_P (operands[3]))
21320 	{
21321 	  var = operands[2];
21322 	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21323 	    operands[2] = constm1_rtx, op = and_optab;
21324 	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21325 	    operands[2] = const0_rtx, op = ior_optab;
21326 	  else
21327 	    return false;
21328 	}
21329       else
21330         return false;
21331 
21332       orig_out = operands[0];
21333       tmp = gen_reg_rtx (mode);
21334       operands[0] = tmp;
21335 
21336       /* Recurse to get the constant loaded.  */
21337       if (ix86_expand_int_movcc (operands) == 0)
21338         return false;
21339 
21340       /* Mask in the interesting variable.  */
21341       out = expand_binop (mode, op, var, tmp, orig_out, 0,
21342 			  OPTAB_WIDEN);
21343       if (!rtx_equal_p (out, orig_out))
21344 	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21345 
21346       return true;
21347     }
21348 
21349   /*
21350    * For comparison with above,
21351    *
21352    * movl cf,dest
21353    * movl ct,tmp
21354    * cmpl op1,op2
21355    * cmovcc tmp,dest
21356    *
21357    * Size 15.
21358    */
21359 
21360   if (! nonimmediate_operand (operands[2], mode))
21361     operands[2] = force_reg (mode, operands[2]);
21362   if (! nonimmediate_operand (operands[3], mode))
21363     operands[3] = force_reg (mode, operands[3]);
21364 
21365   if (! register_operand (operands[2], VOIDmode)
21366       && (mode == QImode
21367           || ! register_operand (operands[3], VOIDmode)))
21368     operands[2] = force_reg (mode, operands[2]);
21369 
21370   if (mode == QImode
21371       && ! register_operand (operands[3], VOIDmode))
21372     operands[3] = force_reg (mode, operands[3]);
21373 
21374   emit_insn (compare_seq);
21375   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21376 			  gen_rtx_IF_THEN_ELSE (mode,
21377 						compare_op, operands[2],
21378 						operands[3])));
21379   return true;
21380 }
21381 
21382 /* Swap, force into registers, or otherwise massage the two operands
21383    to an sse comparison with a mask result.  Thus we differ a bit from
21384    ix86_prepare_fp_compare_args which expects to produce a flags result.
21385 
21386    The DEST operand exists to help determine whether to commute commutative
21387    operators.  The POP0/POP1 operands are updated in place.  The new
21388    comparison code is returned, or UNKNOWN if not implementable.  */
21389 
21390 static enum rtx_code
21391 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21392 				  rtx *pop0, rtx *pop1)
21393 {
21394   switch (code)
21395     {
21396     case LTGT:
21397     case UNEQ:
21398       /* AVX supports all the needed comparisons.  */
21399       if (TARGET_AVX)
21400 	break;
21401       /* We have no LTGT as an operator.  We could implement it with
21402 	 NE & ORDERED, but this requires an extra temporary.  It's
21403 	 not clear that it's worth it.  */
21404       return UNKNOWN;
21405 
21406     case LT:
21407     case LE:
21408     case UNGT:
21409     case UNGE:
21410       /* These are supported directly.  */
21411       break;
21412 
21413     case EQ:
21414     case NE:
21415     case UNORDERED:
21416     case ORDERED:
21417       /* AVX has 3 operand comparisons, no need to swap anything.  */
21418       if (TARGET_AVX)
21419 	break;
21420       /* For commutative operators, try to canonicalize the destination
21421 	 operand to be first in the comparison - this helps reload to
21422 	 avoid extra moves.  */
21423       if (!dest || !rtx_equal_p (dest, *pop1))
21424 	break;
21425       /* FALLTHRU */
21426 
21427     case GE:
21428     case GT:
21429     case UNLE:
21430     case UNLT:
21431       /* These are not supported directly before AVX, and furthermore
21432 	 ix86_expand_sse_fp_minmax only optimizes LT/UNGE.  Swap the
21433 	 comparison operands to transform into something that is
21434 	 supported.  */
21435       std::swap (*pop0, *pop1);
21436       code = swap_condition (code);
21437       break;
21438 
21439     default:
21440       gcc_unreachable ();
21441     }
21442 
21443   return code;
21444 }
21445 
21446 /* Detect conditional moves that exactly match min/max operational
21447    semantics.  Note that this is IEEE safe, as long as we don't
21448    interchange the operands.
21449 
21450    Returns FALSE if this conditional move doesn't match a MIN/MAX,
21451    and TRUE if the operation is successful and instructions are emitted.  */
21452 
21453 static bool
21454 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21455 			   rtx cmp_op1, rtx if_true, rtx if_false)
21456 {
21457   machine_mode mode;
21458   bool is_min;
21459   rtx tmp;
21460 
21461   if (code == LT)
21462     ;
21463   else if (code == UNGE)
21464     std::swap (if_true, if_false);
21465   else
21466     return false;
21467 
21468   if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21469     is_min = true;
21470   else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21471     is_min = false;
21472   else
21473     return false;
21474 
21475   mode = GET_MODE (dest);
21476 
21477   /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21478      but MODE may be a vector mode and thus not appropriate.  */
21479   if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21480     {
21481       int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21482       rtvec v;
21483 
21484       if_true = force_reg (mode, if_true);
21485       v = gen_rtvec (2, if_true, if_false);
21486       tmp = gen_rtx_UNSPEC (mode, v, u);
21487     }
21488   else
21489     {
21490       code = is_min ? SMIN : SMAX;
21491       tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21492     }
21493 
21494   emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21495   return true;
21496 }
21497 
21498 /* Expand an sse vector comparison.  Return the register with the result.  */
21499 
21500 static rtx
21501 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21502 		     rtx op_true, rtx op_false)
21503 {
21504   machine_mode mode = GET_MODE (dest);
21505   machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21506 
21507   /* In general case result of comparison can differ from operands' type.  */
21508   machine_mode cmp_mode;
21509 
21510   /* In AVX512F the result of comparison is an integer mask.  */
21511   bool maskcmp = false;
21512   rtx x;
21513 
21514   if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21515     {
21516       cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21517       gcc_assert (cmp_mode != BLKmode);
21518 
21519       maskcmp = true;
21520     }
21521   else
21522     cmp_mode = cmp_ops_mode;
21523 
21524 
21525   cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21526   if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21527     cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21528 
21529   if (optimize
21530       || reg_overlap_mentioned_p (dest, op_true)
21531       || reg_overlap_mentioned_p (dest, op_false))
21532     dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21533 
21534   /* Compare patterns for int modes are unspec in AVX512F only.  */
21535   if (maskcmp && (code == GT || code == EQ))
21536     {
21537       rtx (*gen)(rtx, rtx, rtx);
21538 
21539       switch (cmp_ops_mode)
21540 	{
21541 	case V64QImode:
21542 	  gcc_assert (TARGET_AVX512BW);
21543 	  gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21544 	  break;
21545 	case V32HImode:
21546 	  gcc_assert (TARGET_AVX512BW);
21547 	  gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21548 	  break;
21549 	case V16SImode:
21550 	  gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21551 	  break;
21552 	case V8DImode:
21553 	  gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21554 	  break;
21555 	default:
21556 	  gen = NULL;
21557 	}
21558 
21559       if (gen)
21560 	{
21561 	  emit_insn (gen (dest, cmp_op0, cmp_op1));
21562 	  return dest;
21563 	}
21564     }
21565   x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21566 
21567   if (cmp_mode != mode && !maskcmp)
21568     {
21569       x = force_reg (cmp_ops_mode, x);
21570       convert_move (dest, x, false);
21571     }
21572   else
21573     emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21574 
21575   return dest;
21576 }
21577 
21578 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21579    operations.  This is used for both scalar and vector conditional moves.  */
21580 
21581 static void
21582 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21583 {
21584   machine_mode mode = GET_MODE (dest);
21585   machine_mode cmpmode = GET_MODE (cmp);
21586 
21587   /* In AVX512F the result of comparison is an integer mask.  */
21588   bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21589 
21590   rtx t2, t3, x;
21591 
21592   if (vector_all_ones_operand (op_true, mode)
21593       && rtx_equal_p (op_false, CONST0_RTX (mode))
21594       && !maskcmp)
21595     {
21596       emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21597     }
21598   else if (op_false == CONST0_RTX (mode)
21599       && !maskcmp)
21600     {
21601       op_true = force_reg (mode, op_true);
21602       x = gen_rtx_AND (mode, cmp, op_true);
21603       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21604     }
21605   else if (op_true == CONST0_RTX (mode)
21606       && !maskcmp)
21607     {
21608       op_false = force_reg (mode, op_false);
21609       x = gen_rtx_NOT (mode, cmp);
21610       x = gen_rtx_AND (mode, x, op_false);
21611       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21612     }
21613   else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21614       && !maskcmp)
21615     {
21616       op_false = force_reg (mode, op_false);
21617       x = gen_rtx_IOR (mode, cmp, op_false);
21618       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21619     }
21620   else if (TARGET_XOP
21621       && !maskcmp)
21622     {
21623       op_true = force_reg (mode, op_true);
21624 
21625       if (!nonimmediate_operand (op_false, mode))
21626 	op_false = force_reg (mode, op_false);
21627 
21628       emit_insn (gen_rtx_SET (mode, dest,
21629 			      gen_rtx_IF_THEN_ELSE (mode, cmp,
21630 						    op_true,
21631 						    op_false)));
21632     }
21633   else
21634     {
21635       rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21636       rtx d = dest;
21637 
21638       if (!nonimmediate_operand (op_true, mode))
21639 	op_true = force_reg (mode, op_true);
21640 
21641       op_false = force_reg (mode, op_false);
21642 
21643       switch (mode)
21644 	{
21645 	case V4SFmode:
21646 	  if (TARGET_SSE4_1)
21647 	    gen = gen_sse4_1_blendvps;
21648 	  break;
21649 	case V2DFmode:
21650 	  if (TARGET_SSE4_1)
21651 	    gen = gen_sse4_1_blendvpd;
21652 	  break;
21653 	case V16QImode:
21654 	case V8HImode:
21655 	case V4SImode:
21656 	case V2DImode:
21657 	  if (TARGET_SSE4_1)
21658 	    {
21659 	      gen = gen_sse4_1_pblendvb;
21660 	      if (mode != V16QImode)
21661 		d = gen_reg_rtx (V16QImode);
21662 	      op_false = gen_lowpart (V16QImode, op_false);
21663 	      op_true = gen_lowpart (V16QImode, op_true);
21664 	      cmp = gen_lowpart (V16QImode, cmp);
21665 	    }
21666 	  break;
21667 	case V8SFmode:
21668 	  if (TARGET_AVX)
21669 	    gen = gen_avx_blendvps256;
21670 	  break;
21671 	case V4DFmode:
21672 	  if (TARGET_AVX)
21673 	    gen = gen_avx_blendvpd256;
21674 	  break;
21675 	case V32QImode:
21676 	case V16HImode:
21677 	case V8SImode:
21678 	case V4DImode:
21679 	  if (TARGET_AVX2)
21680 	    {
21681 	      gen = gen_avx2_pblendvb;
21682 	      if (mode != V32QImode)
21683 		d = gen_reg_rtx (V32QImode);
21684 	      op_false = gen_lowpart (V32QImode, op_false);
21685 	      op_true = gen_lowpart (V32QImode, op_true);
21686 	      cmp = gen_lowpart (V32QImode, cmp);
21687 	    }
21688 	  break;
21689 
21690 	case V64QImode:
21691 	  gen = gen_avx512bw_blendmv64qi;
21692 	  break;
21693 	case V32HImode:
21694 	  gen = gen_avx512bw_blendmv32hi;
21695 	  break;
21696 	case V16SImode:
21697 	  gen = gen_avx512f_blendmv16si;
21698 	  break;
21699 	case V8DImode:
21700 	  gen = gen_avx512f_blendmv8di;
21701 	  break;
21702 	case V8DFmode:
21703 	  gen = gen_avx512f_blendmv8df;
21704 	  break;
21705 	case V16SFmode:
21706 	  gen = gen_avx512f_blendmv16sf;
21707 	  break;
21708 
21709 	default:
21710 	  break;
21711 	}
21712 
21713       if (gen != NULL)
21714 	{
21715 	  emit_insn (gen (d, op_false, op_true, cmp));
21716 	  if (d != dest)
21717 	    emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21718 	}
21719       else
21720 	{
21721 	  op_true = force_reg (mode, op_true);
21722 
21723 	  t2 = gen_reg_rtx (mode);
21724 	  if (optimize)
21725 	    t3 = gen_reg_rtx (mode);
21726 	  else
21727 	    t3 = dest;
21728 
21729 	  x = gen_rtx_AND (mode, op_true, cmp);
21730 	  emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21731 
21732 	  x = gen_rtx_NOT (mode, cmp);
21733 	  x = gen_rtx_AND (mode, x, op_false);
21734 	  emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21735 
21736 	  x = gen_rtx_IOR (mode, t3, t2);
21737 	  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21738 	}
21739     }
21740 }
21741 
21742 /* Expand a floating-point conditional move.  Return true if successful.  */
21743 
21744 bool
21745 ix86_expand_fp_movcc (rtx operands[])
21746 {
21747   machine_mode mode = GET_MODE (operands[0]);
21748   enum rtx_code code = GET_CODE (operands[1]);
21749   rtx tmp, compare_op;
21750   rtx op0 = XEXP (operands[1], 0);
21751   rtx op1 = XEXP (operands[1], 1);
21752 
21753   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21754     {
21755       machine_mode cmode;
21756 
21757       /* Since we've no cmove for sse registers, don't force bad register
21758 	 allocation just to gain access to it.  Deny movcc when the
21759 	 comparison mode doesn't match the move mode.  */
21760       cmode = GET_MODE (op0);
21761       if (cmode == VOIDmode)
21762 	cmode = GET_MODE (op1);
21763       if (cmode != mode)
21764 	return false;
21765 
21766       code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21767       if (code == UNKNOWN)
21768 	return false;
21769 
21770       if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21771 				     operands[2], operands[3]))
21772 	return true;
21773 
21774       tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21775 				 operands[2], operands[3]);
21776       ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21777       return true;
21778     }
21779 
21780   if (GET_MODE (op0) == TImode
21781       || (GET_MODE (op0) == DImode
21782 	  && !TARGET_64BIT))
21783     return false;
21784 
21785   /* The floating point conditional move instructions don't directly
21786      support conditions resulting from a signed integer comparison.  */
21787 
21788   compare_op = ix86_expand_compare (code, op0, op1);
21789   if (!fcmov_comparison_operator (compare_op, VOIDmode))
21790     {
21791       tmp = gen_reg_rtx (QImode);
21792       ix86_expand_setcc (tmp, code, op0, op1);
21793 
21794       compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21795     }
21796 
21797   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21798 			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
21799 						operands[2], operands[3])));
21800 
21801   return true;
21802 }
21803 
21804 /* Expand a floating-point vector conditional move; a vcond operation
21805    rather than a movcc operation.  */
21806 
21807 bool
21808 ix86_expand_fp_vcond (rtx operands[])
21809 {
21810   enum rtx_code code = GET_CODE (operands[3]);
21811   rtx cmp;
21812 
21813   code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21814 					   &operands[4], &operands[5]);
21815   if (code == UNKNOWN)
21816     {
21817       rtx temp;
21818       switch (GET_CODE (operands[3]))
21819 	{
21820 	case LTGT:
21821 	  temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21822 				      operands[5], operands[0], operands[0]);
21823 	  cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21824 				     operands[5], operands[1], operands[2]);
21825 	  code = AND;
21826 	  break;
21827 	case UNEQ:
21828 	  temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21829 				      operands[5], operands[0], operands[0]);
21830 	  cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21831 				     operands[5], operands[1], operands[2]);
21832 	  code = IOR;
21833 	  break;
21834 	default:
21835 	  gcc_unreachable ();
21836 	}
21837       cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21838 				 OPTAB_DIRECT);
21839       ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21840       return true;
21841     }
21842 
21843   if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21844 				 operands[5], operands[1], operands[2]))
21845     return true;
21846 
21847   cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21848 			     operands[1], operands[2]);
21849   ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21850   return true;
21851 }
21852 
21853 /* Expand a signed/unsigned integral vector conditional move.  */
21854 
21855 bool
21856 ix86_expand_int_vcond (rtx operands[])
21857 {
21858   machine_mode data_mode = GET_MODE (operands[0]);
21859   machine_mode mode = GET_MODE (operands[4]);
21860   enum rtx_code code = GET_CODE (operands[3]);
21861   bool negate = false;
21862   rtx x, cop0, cop1;
21863 
21864   cop0 = operands[4];
21865   cop1 = operands[5];
21866 
21867   /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21868      and x < 0 ? 1 : 0 into (unsigned) x >> 31.  */
21869   if ((code == LT || code == GE)
21870       && data_mode == mode
21871       && cop1 == CONST0_RTX (mode)
21872       && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21873       && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21874       && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21875       && (GET_MODE_SIZE (data_mode) == 16
21876 	  || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21877     {
21878       rtx negop = operands[2 - (code == LT)];
21879       int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21880       if (negop == CONST1_RTX (data_mode))
21881 	{
21882 	  rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21883 					 operands[0], 1, OPTAB_DIRECT);
21884 	  if (res != operands[0])
21885 	    emit_move_insn (operands[0], res);
21886 	  return true;
21887 	}
21888       else if (GET_MODE_INNER (data_mode) != DImode
21889 	       && vector_all_ones_operand (negop, data_mode))
21890 	{
21891 	  rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21892 					 operands[0], 0, OPTAB_DIRECT);
21893 	  if (res != operands[0])
21894 	    emit_move_insn (operands[0], res);
21895 	  return true;
21896 	}
21897     }
21898 
21899   if (!nonimmediate_operand (cop1, mode))
21900     cop1 = force_reg (mode, cop1);
21901   if (!general_operand (operands[1], data_mode))
21902     operands[1] = force_reg (data_mode, operands[1]);
21903   if (!general_operand (operands[2], data_mode))
21904     operands[2] = force_reg (data_mode, operands[2]);
21905 
21906   /* XOP supports all of the comparisons on all 128-bit vector int types.  */
21907   if (TARGET_XOP
21908       && (mode == V16QImode || mode == V8HImode
21909 	  || mode == V4SImode || mode == V2DImode))
21910     ;
21911   else
21912     {
21913       /* Canonicalize the comparison to EQ, GT, GTU.  */
21914       switch (code)
21915 	{
21916 	case EQ:
21917 	case GT:
21918 	case GTU:
21919 	  break;
21920 
21921 	case NE:
21922 	case LE:
21923 	case LEU:
21924 	  code = reverse_condition (code);
21925 	  negate = true;
21926 	  break;
21927 
21928 	case GE:
21929 	case GEU:
21930 	  code = reverse_condition (code);
21931 	  negate = true;
21932 	  /* FALLTHRU */
21933 
21934 	case LT:
21935 	case LTU:
21936 	  std::swap (cop0, cop1);
21937 	  code = swap_condition (code);
21938 	  break;
21939 
21940 	default:
21941 	  gcc_unreachable ();
21942 	}
21943 
21944       /* Only SSE4.1/SSE4.2 supports V2DImode.  */
21945       if (mode == V2DImode)
21946 	{
21947 	  switch (code)
21948 	    {
21949 	    case EQ:
21950 	      /* SSE4.1 supports EQ.  */
21951 	      if (!TARGET_SSE4_1)
21952 		return false;
21953 	      break;
21954 
21955 	    case GT:
21956 	    case GTU:
21957 	      /* SSE4.2 supports GT/GTU.  */
21958 	      if (!TARGET_SSE4_2)
21959 		return false;
21960 	      break;
21961 
21962 	    default:
21963 	      gcc_unreachable ();
21964 	    }
21965 	}
21966 
21967       /* Unsigned parallel compare is not supported by the hardware.
21968 	 Play some tricks to turn this into a signed comparison
21969 	 against 0.  */
21970       if (code == GTU)
21971 	{
21972 	  cop0 = force_reg (mode, cop0);
21973 
21974 	  switch (mode)
21975 	    {
21976 	    case V16SImode:
21977 	    case V8DImode:
21978 	    case V8SImode:
21979 	    case V4DImode:
21980 	    case V4SImode:
21981 	    case V2DImode:
21982 		{
21983 		  rtx t1, t2, mask;
21984 		  rtx (*gen_sub3) (rtx, rtx, rtx);
21985 
21986 		  switch (mode)
21987 		    {
21988 		    case V16SImode: gen_sub3 = gen_subv16si3; break;
21989 		    case V8DImode: gen_sub3 = gen_subv8di3; break;
21990 		    case V8SImode: gen_sub3 = gen_subv8si3; break;
21991 		    case V4DImode: gen_sub3 = gen_subv4di3; break;
21992 		    case V4SImode: gen_sub3 = gen_subv4si3; break;
21993 		    case V2DImode: gen_sub3 = gen_subv2di3; break;
21994 		    default:
21995 		      gcc_unreachable ();
21996 		    }
21997 		  /* Subtract (-(INT MAX) - 1) from both operands to make
21998 		     them signed.  */
21999 		  mask = ix86_build_signbit_mask (mode, true, false);
22000 		  t1 = gen_reg_rtx (mode);
22001 		  emit_insn (gen_sub3 (t1, cop0, mask));
22002 
22003 		  t2 = gen_reg_rtx (mode);
22004 		  emit_insn (gen_sub3 (t2, cop1, mask));
22005 
22006 		  cop0 = t1;
22007 		  cop1 = t2;
22008 		  code = GT;
22009 		}
22010 	      break;
22011 
22012 	    case V64QImode:
22013 	    case V32HImode:
22014 	    case V32QImode:
22015 	    case V16HImode:
22016 	    case V16QImode:
22017 	    case V8HImode:
22018 	      /* Perform a parallel unsigned saturating subtraction.  */
22019 	      x = gen_reg_rtx (mode);
22020 	      emit_insn (gen_rtx_SET (VOIDmode, x,
22021 				      gen_rtx_US_MINUS (mode, cop0, cop1)));
22022 
22023 	      cop0 = x;
22024 	      cop1 = CONST0_RTX (mode);
22025 	      code = EQ;
22026 	      negate = !negate;
22027 	      break;
22028 
22029 	    default:
22030 	      gcc_unreachable ();
22031 	    }
22032 	}
22033     }
22034 
22035   /* Allow the comparison to be done in one mode, but the movcc to
22036      happen in another mode.  */
22037   if (data_mode == mode)
22038     {
22039       x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
22040 			       operands[1+negate], operands[2-negate]);
22041     }
22042   else
22043     {
22044       gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
22045       x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
22046 			       operands[1+negate], operands[2-negate]);
22047       if (GET_MODE (x) == mode)
22048 	x = gen_lowpart (data_mode, x);
22049     }
22050 
22051   ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
22052 			 operands[2-negate]);
22053   return true;
22054 }
22055 
22056 /* AVX512F does support 64-byte integer vector operations,
22057    thus the longest vector we are faced with is V64QImode.  */
22058 #define MAX_VECT_LEN	64
22059 
22060 struct expand_vec_perm_d
22061 {
22062   rtx target, op0, op1;
22063   unsigned char perm[MAX_VECT_LEN];
22064   machine_mode vmode;
22065   unsigned char nelt;
22066   bool one_operand_p;
22067   bool testing_p;
22068 };
22069 
22070 static bool
22071 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
22072 			      struct expand_vec_perm_d *d)
22073 {
22074   /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22075      expander, so args are either in d, or in op0, op1 etc.  */
22076   machine_mode mode = GET_MODE (d ? d->op0 : op0);
22077   machine_mode maskmode = mode;
22078   rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
22079 
22080   switch (mode)
22081     {
22082     case V8HImode:
22083       if (TARGET_AVX512VL && TARGET_AVX512BW)
22084 	gen = gen_avx512vl_vpermi2varv8hi3;
22085       break;
22086     case V16HImode:
22087       if (TARGET_AVX512VL && TARGET_AVX512BW)
22088 	gen = gen_avx512vl_vpermi2varv16hi3;
22089       break;
22090     case V64QImode:
22091       if (TARGET_AVX512VBMI)
22092 	gen = gen_avx512bw_vpermi2varv64qi3;
22093       break;
22094     case V32HImode:
22095       if (TARGET_AVX512BW)
22096 	gen = gen_avx512bw_vpermi2varv32hi3;
22097       break;
22098     case V4SImode:
22099       if (TARGET_AVX512VL)
22100 	gen = gen_avx512vl_vpermi2varv4si3;
22101       break;
22102     case V8SImode:
22103       if (TARGET_AVX512VL)
22104 	gen = gen_avx512vl_vpermi2varv8si3;
22105       break;
22106     case V16SImode:
22107       if (TARGET_AVX512F)
22108 	gen = gen_avx512f_vpermi2varv16si3;
22109       break;
22110     case V4SFmode:
22111       if (TARGET_AVX512VL)
22112 	{
22113 	  gen = gen_avx512vl_vpermi2varv4sf3;
22114 	  maskmode = V4SImode;
22115 	}
22116       break;
22117     case V8SFmode:
22118       if (TARGET_AVX512VL)
22119 	{
22120 	  gen = gen_avx512vl_vpermi2varv8sf3;
22121 	  maskmode = V8SImode;
22122 	}
22123       break;
22124     case V16SFmode:
22125       if (TARGET_AVX512F)
22126 	{
22127 	  gen = gen_avx512f_vpermi2varv16sf3;
22128 	  maskmode = V16SImode;
22129 	}
22130       break;
22131     case V2DImode:
22132       if (TARGET_AVX512VL)
22133 	gen = gen_avx512vl_vpermi2varv2di3;
22134       break;
22135     case V4DImode:
22136       if (TARGET_AVX512VL)
22137 	gen = gen_avx512vl_vpermi2varv4di3;
22138       break;
22139     case V8DImode:
22140       if (TARGET_AVX512F)
22141 	gen = gen_avx512f_vpermi2varv8di3;
22142       break;
22143     case V2DFmode:
22144       if (TARGET_AVX512VL)
22145 	{
22146 	  gen = gen_avx512vl_vpermi2varv2df3;
22147 	  maskmode = V2DImode;
22148 	}
22149       break;
22150     case V4DFmode:
22151       if (TARGET_AVX512VL)
22152 	{
22153 	  gen = gen_avx512vl_vpermi2varv4df3;
22154 	  maskmode = V4DImode;
22155 	}
22156       break;
22157     case V8DFmode:
22158       if (TARGET_AVX512F)
22159 	{
22160 	  gen = gen_avx512f_vpermi2varv8df3;
22161 	  maskmode = V8DImode;
22162 	}
22163       break;
22164     default:
22165       break;
22166     }
22167 
22168   if (gen == NULL)
22169     return false;
22170 
22171   /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22172      expander, so args are either in d, or in op0, op1 etc.  */
22173   if (d)
22174     {
22175       rtx vec[64];
22176       target = d->target;
22177       op0 = d->op0;
22178       op1 = d->op1;
22179       for (int i = 0; i < d->nelt; ++i)
22180 	vec[i] = GEN_INT (d->perm[i]);
22181       mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
22182     }
22183 
22184   emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
22185   return true;
22186 }
22187 
22188 /* Expand a variable vector permutation.  */
22189 
22190 void
22191 ix86_expand_vec_perm (rtx operands[])
22192 {
22193   rtx target = operands[0];
22194   rtx op0 = operands[1];
22195   rtx op1 = operands[2];
22196   rtx mask = operands[3];
22197   rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22198   machine_mode mode = GET_MODE (op0);
22199   machine_mode maskmode = GET_MODE (mask);
22200   int w, e, i;
22201   bool one_operand_shuffle = rtx_equal_p (op0, op1);
22202 
22203   /* Number of elements in the vector.  */
22204   w = GET_MODE_NUNITS (mode);
22205   e = GET_MODE_UNIT_SIZE (mode);
22206   gcc_assert (w <= 64);
22207 
22208   if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22209     return;
22210 
22211   if (TARGET_AVX2)
22212     {
22213       if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22214 	{
22215 	  /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22216 	     an constant shuffle operand.  With a tiny bit of effort we can
22217 	     use VPERMD instead.  A re-interpretation stall for V4DFmode is
22218 	     unfortunate but there's no avoiding it.
22219 	     Similarly for V16HImode we don't have instructions for variable
22220 	     shuffling, while for V32QImode we can use after preparing suitable
22221 	     masks vpshufb; vpshufb; vpermq; vpor.  */
22222 
22223 	  if (mode == V16HImode)
22224 	    {
22225 	      maskmode = mode = V32QImode;
22226 	      w = 32;
22227 	      e = 1;
22228 	    }
22229 	  else
22230 	    {
22231 	      maskmode = mode = V8SImode;
22232 	      w = 8;
22233 	      e = 4;
22234 	    }
22235 	  t1 = gen_reg_rtx (maskmode);
22236 
22237 	  /* Replicate the low bits of the V4DImode mask into V8SImode:
22238 	       mask = { A B C D }
22239 	       t1 = { A A B B C C D D }.  */
22240 	  for (i = 0; i < w / 2; ++i)
22241 	    vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22242 	  vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22243 	  vt = force_reg (maskmode, vt);
22244 	  mask = gen_lowpart (maskmode, mask);
22245 	  if (maskmode == V8SImode)
22246 	    emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22247 	  else
22248 	    emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22249 
22250 	  /* Multiply the shuffle indicies by two.  */
22251 	  t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22252 				    OPTAB_DIRECT);
22253 
22254 	  /* Add one to the odd shuffle indicies:
22255 		t1 = { A*2, A*2+1, B*2, B*2+1, ... }.  */
22256 	  for (i = 0; i < w / 2; ++i)
22257 	    {
22258 	      vec[i * 2] = const0_rtx;
22259 	      vec[i * 2 + 1] = const1_rtx;
22260 	    }
22261 	  vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22262 	  vt = validize_mem (force_const_mem (maskmode, vt));
22263 	  t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22264 				    OPTAB_DIRECT);
22265 
22266 	  /* Continue as if V8SImode (resp. V32QImode) was used initially.  */
22267 	  operands[3] = mask = t1;
22268 	  target = gen_reg_rtx (mode);
22269 	  op0 = gen_lowpart (mode, op0);
22270 	  op1 = gen_lowpart (mode, op1);
22271 	}
22272 
22273       switch (mode)
22274 	{
22275 	case V8SImode:
22276 	  /* The VPERMD and VPERMPS instructions already properly ignore
22277 	     the high bits of the shuffle elements.  No need for us to
22278 	     perform an AND ourselves.  */
22279 	  if (one_operand_shuffle)
22280 	    {
22281 	      emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22282 	      if (target != operands[0])
22283 		emit_move_insn (operands[0],
22284 				gen_lowpart (GET_MODE (operands[0]), target));
22285 	    }
22286 	  else
22287 	    {
22288 	      t1 = gen_reg_rtx (V8SImode);
22289 	      t2 = gen_reg_rtx (V8SImode);
22290 	      emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22291 	      emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22292 	      goto merge_two;
22293 	    }
22294 	  return;
22295 
22296 	case V8SFmode:
22297 	  mask = gen_lowpart (V8SImode, mask);
22298 	  if (one_operand_shuffle)
22299 	    emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22300 	  else
22301 	    {
22302 	      t1 = gen_reg_rtx (V8SFmode);
22303 	      t2 = gen_reg_rtx (V8SFmode);
22304 	      emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22305 	      emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22306 	      goto merge_two;
22307 	    }
22308 	  return;
22309 
22310         case V4SImode:
22311 	  /* By combining the two 128-bit input vectors into one 256-bit
22312 	     input vector, we can use VPERMD and VPERMPS for the full
22313 	     two-operand shuffle.  */
22314 	  t1 = gen_reg_rtx (V8SImode);
22315 	  t2 = gen_reg_rtx (V8SImode);
22316 	  emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22317 	  emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22318 	  emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22319 	  emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22320 	  return;
22321 
22322         case V4SFmode:
22323 	  t1 = gen_reg_rtx (V8SFmode);
22324 	  t2 = gen_reg_rtx (V8SImode);
22325 	  mask = gen_lowpart (V4SImode, mask);
22326 	  emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22327 	  emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22328 	  emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22329 	  emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22330 	  return;
22331 
22332 	case V32QImode:
22333 	  t1 = gen_reg_rtx (V32QImode);
22334 	  t2 = gen_reg_rtx (V32QImode);
22335 	  t3 = gen_reg_rtx (V32QImode);
22336 	  vt2 = GEN_INT (-128);
22337 	  for (i = 0; i < 32; i++)
22338 	    vec[i] = vt2;
22339 	  vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22340 	  vt = force_reg (V32QImode, vt);
22341 	  for (i = 0; i < 32; i++)
22342 	    vec[i] = i < 16 ? vt2 : const0_rtx;
22343 	  vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22344 	  vt2 = force_reg (V32QImode, vt2);
22345 	  /* From mask create two adjusted masks, which contain the same
22346 	     bits as mask in the low 7 bits of each vector element.
22347 	     The first mask will have the most significant bit clear
22348 	     if it requests element from the same 128-bit lane
22349 	     and MSB set if it requests element from the other 128-bit lane.
22350 	     The second mask will have the opposite values of the MSB,
22351 	     and additionally will have its 128-bit lanes swapped.
22352 	     E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22353 	     t1   { 07 92 9e 09 ... | 17 19 85 1f ... } and
22354 	     t3   { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22355 	     stands for other 12 bytes.  */
22356 	  /* The bit whether element is from the same lane or the other
22357 	     lane is bit 4, so shift it up by 3 to the MSB position.  */
22358 	  t5 = gen_reg_rtx (V4DImode);
22359 	  emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22360 				    GEN_INT (3)));
22361 	  /* Clear MSB bits from the mask just in case it had them set.  */
22362 	  emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22363 	  /* After this t1 will have MSB set for elements from other lane.  */
22364 	  emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22365 	  /* Clear bits other than MSB.  */
22366 	  emit_insn (gen_andv32qi3 (t1, t1, vt));
22367 	  /* Or in the lower bits from mask into t3.  */
22368 	  emit_insn (gen_iorv32qi3 (t3, t1, t2));
22369 	  /* And invert MSB bits in t1, so MSB is set for elements from the same
22370 	     lane.  */
22371 	  emit_insn (gen_xorv32qi3 (t1, t1, vt));
22372 	  /* Swap 128-bit lanes in t3.  */
22373 	  t6 = gen_reg_rtx (V4DImode);
22374 	  emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22375 					  const2_rtx, GEN_INT (3),
22376 					  const0_rtx, const1_rtx));
22377 	  /* And or in the lower bits from mask into t1.  */
22378 	  emit_insn (gen_iorv32qi3 (t1, t1, t2));
22379 	  if (one_operand_shuffle)
22380 	    {
22381 	      /* Each of these shuffles will put 0s in places where
22382 		 element from the other 128-bit lane is needed, otherwise
22383 		 will shuffle in the requested value.  */
22384 	      emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22385 						gen_lowpart (V32QImode, t6)));
22386 	      emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22387 	      /* For t3 the 128-bit lanes are swapped again.  */
22388 	      t7 = gen_reg_rtx (V4DImode);
22389 	      emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22390 					      const2_rtx, GEN_INT (3),
22391 					      const0_rtx, const1_rtx));
22392 	      /* And oring both together leads to the result.  */
22393 	      emit_insn (gen_iorv32qi3 (target, t1,
22394 					gen_lowpart (V32QImode, t7)));
22395 	      if (target != operands[0])
22396 		emit_move_insn (operands[0],
22397 				gen_lowpart (GET_MODE (operands[0]), target));
22398 	      return;
22399 	    }
22400 
22401 	  t4 = gen_reg_rtx (V32QImode);
22402 	  /* Similarly to the above one_operand_shuffle code,
22403 	     just for repeated twice for each operand.  merge_two:
22404 	     code will merge the two results together.  */
22405 	  emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22406 					    gen_lowpart (V32QImode, t6)));
22407 	  emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22408 					    gen_lowpart (V32QImode, t6)));
22409 	  emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22410 	  emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22411 	  t7 = gen_reg_rtx (V4DImode);
22412 	  emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22413 					  const2_rtx, GEN_INT (3),
22414 					  const0_rtx, const1_rtx));
22415 	  t8 = gen_reg_rtx (V4DImode);
22416 	  emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22417 					  const2_rtx, GEN_INT (3),
22418 					  const0_rtx, const1_rtx));
22419 	  emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22420 	  emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22421 	  t1 = t4;
22422 	  t2 = t3;
22423 	  goto merge_two;
22424 
22425 	default:
22426 	  gcc_assert (GET_MODE_SIZE (mode) <= 16);
22427 	  break;
22428 	}
22429     }
22430 
22431   if (TARGET_XOP)
22432     {
22433       /* The XOP VPPERM insn supports three inputs.  By ignoring the
22434 	 one_operand_shuffle special case, we avoid creating another
22435 	 set of constant vectors in memory.  */
22436       one_operand_shuffle = false;
22437 
22438       /* mask = mask & {2*w-1, ...} */
22439       vt = GEN_INT (2*w - 1);
22440     }
22441   else
22442     {
22443       /* mask = mask & {w-1, ...} */
22444       vt = GEN_INT (w - 1);
22445     }
22446 
22447   for (i = 0; i < w; i++)
22448     vec[i] = vt;
22449   vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22450   mask = expand_simple_binop (maskmode, AND, mask, vt,
22451 			      NULL_RTX, 0, OPTAB_DIRECT);
22452 
22453   /* For non-QImode operations, convert the word permutation control
22454      into a byte permutation control.  */
22455   if (mode != V16QImode)
22456     {
22457       mask = expand_simple_binop (maskmode, ASHIFT, mask,
22458 				  GEN_INT (exact_log2 (e)),
22459 				  NULL_RTX, 0, OPTAB_DIRECT);
22460 
22461       /* Convert mask to vector of chars.  */
22462       mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22463 
22464       /* Replicate each of the input bytes into byte positions:
22465 	 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22466 	 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22467 	 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}.  */
22468       for (i = 0; i < 16; ++i)
22469 	vec[i] = GEN_INT (i/e * e);
22470       vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22471       vt = validize_mem (force_const_mem (V16QImode, vt));
22472       if (TARGET_XOP)
22473 	emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22474       else
22475 	emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22476 
22477       /* Convert it into the byte positions by doing
22478 	 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...}  */
22479       for (i = 0; i < 16; ++i)
22480 	vec[i] = GEN_INT (i % e);
22481       vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22482       vt = validize_mem (force_const_mem (V16QImode, vt));
22483       emit_insn (gen_addv16qi3 (mask, mask, vt));
22484     }
22485 
22486   /* The actual shuffle operations all operate on V16QImode.  */
22487   op0 = gen_lowpart (V16QImode, op0);
22488   op1 = gen_lowpart (V16QImode, op1);
22489 
22490   if (TARGET_XOP)
22491     {
22492       if (GET_MODE (target) != V16QImode)
22493 	target = gen_reg_rtx (V16QImode);
22494       emit_insn (gen_xop_pperm (target, op0, op1, mask));
22495       if (target != operands[0])
22496 	emit_move_insn (operands[0],
22497 			gen_lowpart (GET_MODE (operands[0]), target));
22498     }
22499   else if (one_operand_shuffle)
22500     {
22501       if (GET_MODE (target) != V16QImode)
22502 	target = gen_reg_rtx (V16QImode);
22503       emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22504       if (target != operands[0])
22505 	emit_move_insn (operands[0],
22506 			gen_lowpart (GET_MODE (operands[0]), target));
22507     }
22508   else
22509     {
22510       rtx xops[6];
22511       bool ok;
22512 
22513       /* Shuffle the two input vectors independently.  */
22514       t1 = gen_reg_rtx (V16QImode);
22515       t2 = gen_reg_rtx (V16QImode);
22516       emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22517       emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22518 
22519  merge_two:
22520       /* Then merge them together.  The key is whether any given control
22521          element contained a bit set that indicates the second word.  */
22522       mask = operands[3];
22523       vt = GEN_INT (w);
22524       if (maskmode == V2DImode && !TARGET_SSE4_1)
22525 	{
22526 	  /* Without SSE4.1, we don't have V2DImode EQ.  Perform one
22527 	     more shuffle to convert the V2DI input mask into a V4SI
22528 	     input mask.  At which point the masking that expand_int_vcond
22529 	     will work as desired.  */
22530 	  rtx t3 = gen_reg_rtx (V4SImode);
22531 	  emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22532 				        const0_rtx, const0_rtx,
22533 				        const2_rtx, const2_rtx));
22534 	  mask = t3;
22535 	  maskmode = V4SImode;
22536 	  e = w = 4;
22537 	}
22538 
22539       for (i = 0; i < w; i++)
22540 	vec[i] = vt;
22541       vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22542       vt = force_reg (maskmode, vt);
22543       mask = expand_simple_binop (maskmode, AND, mask, vt,
22544 				  NULL_RTX, 0, OPTAB_DIRECT);
22545 
22546       if (GET_MODE (target) != mode)
22547 	target = gen_reg_rtx (mode);
22548       xops[0] = target;
22549       xops[1] = gen_lowpart (mode, t2);
22550       xops[2] = gen_lowpart (mode, t1);
22551       xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22552       xops[4] = mask;
22553       xops[5] = vt;
22554       ok = ix86_expand_int_vcond (xops);
22555       gcc_assert (ok);
22556       if (target != operands[0])
22557 	emit_move_insn (operands[0],
22558 			gen_lowpart (GET_MODE (operands[0]), target));
22559     }
22560 }
22561 
22562 /* Unpack OP[1] into the next wider integer vector type.  UNSIGNED_P is
22563    true if we should do zero extension, else sign extension.  HIGH_P is
22564    true if we want the N/2 high elements, else the low elements.  */
22565 
22566 void
22567 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22568 {
22569   machine_mode imode = GET_MODE (src);
22570   rtx tmp;
22571 
22572   if (TARGET_SSE4_1)
22573     {
22574       rtx (*unpack)(rtx, rtx);
22575       rtx (*extract)(rtx, rtx) = NULL;
22576       machine_mode halfmode = BLKmode;
22577 
22578       switch (imode)
22579 	{
22580 	case V64QImode:
22581 	  if (unsigned_p)
22582 	    unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22583 	  else
22584 	    unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22585 	  halfmode = V32QImode;
22586 	  extract
22587 	    = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22588 	  break;
22589 	case V32QImode:
22590 	  if (unsigned_p)
22591 	    unpack = gen_avx2_zero_extendv16qiv16hi2;
22592 	  else
22593 	    unpack = gen_avx2_sign_extendv16qiv16hi2;
22594 	  halfmode = V16QImode;
22595 	  extract
22596 	    = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22597 	  break;
22598 	case V32HImode:
22599 	  if (unsigned_p)
22600 	    unpack = gen_avx512f_zero_extendv16hiv16si2;
22601 	  else
22602 	    unpack = gen_avx512f_sign_extendv16hiv16si2;
22603 	  halfmode = V16HImode;
22604 	  extract
22605 	    = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22606 	  break;
22607 	case V16HImode:
22608 	  if (unsigned_p)
22609 	    unpack = gen_avx2_zero_extendv8hiv8si2;
22610 	  else
22611 	    unpack = gen_avx2_sign_extendv8hiv8si2;
22612 	  halfmode = V8HImode;
22613 	  extract
22614 	    = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22615 	  break;
22616 	case V16SImode:
22617 	  if (unsigned_p)
22618 	    unpack = gen_avx512f_zero_extendv8siv8di2;
22619 	  else
22620 	    unpack = gen_avx512f_sign_extendv8siv8di2;
22621 	  halfmode = V8SImode;
22622 	  extract
22623 	    = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22624 	  break;
22625 	case V8SImode:
22626 	  if (unsigned_p)
22627 	    unpack = gen_avx2_zero_extendv4siv4di2;
22628 	  else
22629 	    unpack = gen_avx2_sign_extendv4siv4di2;
22630 	  halfmode = V4SImode;
22631 	  extract
22632 	    = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22633 	  break;
22634 	case V16QImode:
22635 	  if (unsigned_p)
22636 	    unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22637 	  else
22638 	    unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22639 	  break;
22640 	case V8HImode:
22641 	  if (unsigned_p)
22642 	    unpack = gen_sse4_1_zero_extendv4hiv4si2;
22643 	  else
22644 	    unpack = gen_sse4_1_sign_extendv4hiv4si2;
22645 	  break;
22646 	case V4SImode:
22647 	  if (unsigned_p)
22648 	    unpack = gen_sse4_1_zero_extendv2siv2di2;
22649 	  else
22650 	    unpack = gen_sse4_1_sign_extendv2siv2di2;
22651 	  break;
22652 	default:
22653 	  gcc_unreachable ();
22654 	}
22655 
22656       if (GET_MODE_SIZE (imode) >= 32)
22657 	{
22658 	  tmp = gen_reg_rtx (halfmode);
22659 	  emit_insn (extract (tmp, src));
22660 	}
22661       else if (high_p)
22662 	{
22663 	  /* Shift higher 8 bytes to lower 8 bytes.  */
22664 	  tmp = gen_reg_rtx (V1TImode);
22665 	  emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22666 					 GEN_INT (64)));
22667 	  tmp = gen_lowpart (imode, tmp);
22668 	}
22669       else
22670 	tmp = src;
22671 
22672       emit_insn (unpack (dest, tmp));
22673     }
22674   else
22675     {
22676       rtx (*unpack)(rtx, rtx, rtx);
22677 
22678       switch (imode)
22679 	{
22680 	case V16QImode:
22681 	  if (high_p)
22682 	    unpack = gen_vec_interleave_highv16qi;
22683 	  else
22684 	    unpack = gen_vec_interleave_lowv16qi;
22685 	  break;
22686 	case V8HImode:
22687 	  if (high_p)
22688 	    unpack = gen_vec_interleave_highv8hi;
22689 	  else
22690 	    unpack = gen_vec_interleave_lowv8hi;
22691 	  break;
22692 	case V4SImode:
22693 	  if (high_p)
22694 	    unpack = gen_vec_interleave_highv4si;
22695 	  else
22696 	    unpack = gen_vec_interleave_lowv4si;
22697 	  break;
22698 	default:
22699 	  gcc_unreachable ();
22700 	}
22701 
22702       if (unsigned_p)
22703 	tmp = force_reg (imode, CONST0_RTX (imode));
22704       else
22705 	tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22706 				   src, pc_rtx, pc_rtx);
22707 
22708       rtx tmp2 = gen_reg_rtx (imode);
22709       emit_insn (unpack (tmp2, src, tmp));
22710       emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22711     }
22712 }
22713 
22714 /* Expand conditional increment or decrement using adb/sbb instructions.
22715    The default case using setcc followed by the conditional move can be
22716    done by generic code.  */
22717 bool
22718 ix86_expand_int_addcc (rtx operands[])
22719 {
22720   enum rtx_code code = GET_CODE (operands[1]);
22721   rtx flags;
22722   rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22723   rtx compare_op;
22724   rtx val = const0_rtx;
22725   bool fpcmp = false;
22726   machine_mode mode;
22727   rtx op0 = XEXP (operands[1], 0);
22728   rtx op1 = XEXP (operands[1], 1);
22729 
22730   if (operands[3] != const1_rtx
22731       && operands[3] != constm1_rtx)
22732     return false;
22733   if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22734      return false;
22735   code = GET_CODE (compare_op);
22736 
22737   flags = XEXP (compare_op, 0);
22738 
22739   if (GET_MODE (flags) == CCFPmode
22740       || GET_MODE (flags) == CCFPUmode)
22741     {
22742       fpcmp = true;
22743       code = ix86_fp_compare_code_to_integer (code);
22744     }
22745 
22746   if (code != LTU)
22747     {
22748       val = constm1_rtx;
22749       if (fpcmp)
22750 	PUT_CODE (compare_op,
22751 		  reverse_condition_maybe_unordered
22752 		    (GET_CODE (compare_op)));
22753       else
22754 	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22755     }
22756 
22757   mode = GET_MODE (operands[0]);
22758 
22759   /* Construct either adc or sbb insn.  */
22760   if ((code == LTU) == (operands[3] == constm1_rtx))
22761     {
22762       switch (mode)
22763 	{
22764 	  case QImode:
22765 	    insn = gen_subqi3_carry;
22766 	    break;
22767 	  case HImode:
22768 	    insn = gen_subhi3_carry;
22769 	    break;
22770 	  case SImode:
22771 	    insn = gen_subsi3_carry;
22772 	    break;
22773 	  case DImode:
22774 	    insn = gen_subdi3_carry;
22775 	    break;
22776 	  default:
22777 	    gcc_unreachable ();
22778 	}
22779     }
22780   else
22781     {
22782       switch (mode)
22783 	{
22784 	  case QImode:
22785 	    insn = gen_addqi3_carry;
22786 	    break;
22787 	  case HImode:
22788 	    insn = gen_addhi3_carry;
22789 	    break;
22790 	  case SImode:
22791 	    insn = gen_addsi3_carry;
22792 	    break;
22793 	  case DImode:
22794 	    insn = gen_adddi3_carry;
22795 	    break;
22796 	  default:
22797 	    gcc_unreachable ();
22798 	}
22799     }
22800   emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22801 
22802   return true;
22803 }
22804 
22805 
22806 /* Split operands 0 and 1 into half-mode parts.  Similar to split_double_mode,
22807    but works for floating pointer parameters and nonoffsetable memories.
22808    For pushes, it returns just stack offsets; the values will be saved
22809    in the right order.  Maximally three parts are generated.  */
22810 
22811 static int
22812 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22813 {
22814   int size;
22815 
22816   if (!TARGET_64BIT)
22817     size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22818   else
22819     size = (GET_MODE_SIZE (mode) + 4) / 8;
22820 
22821   gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22822   gcc_assert (size >= 2 && size <= 4);
22823 
22824   /* Optimize constant pool reference to immediates.  This is used by fp
22825      moves, that force all constants to memory to allow combining.  */
22826   if (MEM_P (operand) && MEM_READONLY_P (operand))
22827     {
22828       rtx tmp = maybe_get_pool_constant (operand);
22829       if (tmp)
22830 	operand = tmp;
22831     }
22832 
22833   if (MEM_P (operand) && !offsettable_memref_p (operand))
22834     {
22835       /* The only non-offsetable memories we handle are pushes.  */
22836       int ok = push_operand (operand, VOIDmode);
22837 
22838       gcc_assert (ok);
22839 
22840       operand = copy_rtx (operand);
22841       PUT_MODE (operand, word_mode);
22842       parts[0] = parts[1] = parts[2] = parts[3] = operand;
22843       return size;
22844     }
22845 
22846   if (GET_CODE (operand) == CONST_VECTOR)
22847     {
22848       machine_mode imode = int_mode_for_mode (mode);
22849       /* Caution: if we looked through a constant pool memory above,
22850 	 the operand may actually have a different mode now.  That's
22851 	 ok, since we want to pun this all the way back to an integer.  */
22852       operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22853       gcc_assert (operand != NULL);
22854       mode = imode;
22855     }
22856 
22857   if (!TARGET_64BIT)
22858     {
22859       if (mode == DImode)
22860 	split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22861       else
22862 	{
22863 	  int i;
22864 
22865 	  if (REG_P (operand))
22866 	    {
22867 	      gcc_assert (reload_completed);
22868 	      for (i = 0; i < size; i++)
22869 		parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22870 	    }
22871 	  else if (offsettable_memref_p (operand))
22872 	    {
22873 	      operand = adjust_address (operand, SImode, 0);
22874 	      parts[0] = operand;
22875 	      for (i = 1; i < size; i++)
22876 		parts[i] = adjust_address (operand, SImode, 4 * i);
22877 	    }
22878 	  else if (GET_CODE (operand) == CONST_DOUBLE)
22879 	    {
22880 	      REAL_VALUE_TYPE r;
22881 	      long l[4];
22882 
22883 	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22884 	      switch (mode)
22885 		{
22886 		case TFmode:
22887 		  real_to_target (l, &r, mode);
22888 		  parts[3] = gen_int_mode (l[3], SImode);
22889 		  parts[2] = gen_int_mode (l[2], SImode);
22890 		  break;
22891 		case XFmode:
22892 		  /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22893 		     long double may not be 80-bit.  */
22894 		  real_to_target (l, &r, mode);
22895 		  parts[2] = gen_int_mode (l[2], SImode);
22896 		  break;
22897 		case DFmode:
22898 		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22899 		  break;
22900 		default:
22901 		  gcc_unreachable ();
22902 		}
22903 	      parts[1] = gen_int_mode (l[1], SImode);
22904 	      parts[0] = gen_int_mode (l[0], SImode);
22905 	    }
22906 	  else
22907 	    gcc_unreachable ();
22908 	}
22909     }
22910   else
22911     {
22912       if (mode == TImode)
22913 	split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22914       if (mode == XFmode || mode == TFmode)
22915 	{
22916 	  machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22917 	  if (REG_P (operand))
22918 	    {
22919 	      gcc_assert (reload_completed);
22920 	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22921 	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22922 	    }
22923 	  else if (offsettable_memref_p (operand))
22924 	    {
22925 	      operand = adjust_address (operand, DImode, 0);
22926 	      parts[0] = operand;
22927 	      parts[1] = adjust_address (operand, upper_mode, 8);
22928 	    }
22929 	  else if (GET_CODE (operand) == CONST_DOUBLE)
22930 	    {
22931 	      REAL_VALUE_TYPE r;
22932 	      long l[4];
22933 
22934 	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22935 	      real_to_target (l, &r, mode);
22936 
22937 	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
22938 	      if (HOST_BITS_PER_WIDE_INT >= 64)
22939 	        parts[0]
22940 		  = gen_int_mode
22941 		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22942 		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22943 		       DImode);
22944 	      else
22945 	        parts[0] = immed_double_const (l[0], l[1], DImode);
22946 
22947 	      if (upper_mode == SImode)
22948 	        parts[1] = gen_int_mode (l[2], SImode);
22949 	      else if (HOST_BITS_PER_WIDE_INT >= 64)
22950 	        parts[1]
22951 		  = gen_int_mode
22952 		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22953 		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22954 		       DImode);
22955 	      else
22956 	        parts[1] = immed_double_const (l[2], l[3], DImode);
22957 	    }
22958 	  else
22959 	    gcc_unreachable ();
22960 	}
22961     }
22962 
22963   return size;
22964 }
22965 
22966 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22967    Return false when normal moves are needed; true when all required
22968    insns have been emitted.  Operands 2-4 contain the input values
22969    int the correct order; operands 5-7 contain the output values.  */
22970 
22971 void
22972 ix86_split_long_move (rtx operands[])
22973 {
22974   rtx part[2][4];
22975   int nparts, i, j;
22976   int push = 0;
22977   int collisions = 0;
22978   machine_mode mode = GET_MODE (operands[0]);
22979   bool collisionparts[4];
22980 
22981   /* The DFmode expanders may ask us to move double.
22982      For 64bit target this is single move.  By hiding the fact
22983      here we simplify i386.md splitters.  */
22984   if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22985     {
22986       /* Optimize constant pool reference to immediates.  This is used by
22987 	 fp moves, that force all constants to memory to allow combining.  */
22988 
22989       if (MEM_P (operands[1])
22990 	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22991 	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22992 	operands[1] = get_pool_constant (XEXP (operands[1], 0));
22993       if (push_operand (operands[0], VOIDmode))
22994 	{
22995 	  operands[0] = copy_rtx (operands[0]);
22996 	  PUT_MODE (operands[0], word_mode);
22997 	}
22998       else
22999         operands[0] = gen_lowpart (DImode, operands[0]);
23000       operands[1] = gen_lowpart (DImode, operands[1]);
23001       emit_move_insn (operands[0], operands[1]);
23002       return;
23003     }
23004 
23005   /* The only non-offsettable memory we handle is push.  */
23006   if (push_operand (operands[0], VOIDmode))
23007     push = 1;
23008   else
23009     gcc_assert (!MEM_P (operands[0])
23010 		|| offsettable_memref_p (operands[0]));
23011 
23012   nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
23013   ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
23014 
23015   /* When emitting push, take care for source operands on the stack.  */
23016   if (push && MEM_P (operands[1])
23017       && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
23018     {
23019       rtx src_base = XEXP (part[1][nparts - 1], 0);
23020 
23021       /* Compensate for the stack decrement by 4.  */
23022       if (!TARGET_64BIT && nparts == 3
23023 	  && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
23024 	src_base = plus_constant (Pmode, src_base, 4);
23025 
23026       /* src_base refers to the stack pointer and is
23027 	 automatically decreased by emitted push.  */
23028       for (i = 0; i < nparts; i++)
23029 	part[1][i] = change_address (part[1][i],
23030 				     GET_MODE (part[1][i]), src_base);
23031     }
23032 
23033   /* We need to do copy in the right order in case an address register
23034      of the source overlaps the destination.  */
23035   if (REG_P (part[0][0]) && MEM_P (part[1][0]))
23036     {
23037       rtx tmp;
23038 
23039       for (i = 0; i < nparts; i++)
23040 	{
23041 	  collisionparts[i]
23042 	    = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
23043 	  if (collisionparts[i])
23044 	    collisions++;
23045 	}
23046 
23047       /* Collision in the middle part can be handled by reordering.  */
23048       if (collisions == 1 && nparts == 3 && collisionparts [1])
23049 	{
23050 	  std::swap (part[0][1], part[0][2]);
23051 	  std::swap (part[1][1], part[1][2]);
23052 	}
23053       else if (collisions == 1
23054 	       && nparts == 4
23055 	       && (collisionparts [1] || collisionparts [2]))
23056 	{
23057 	  if (collisionparts [1])
23058 	    {
23059 	      std::swap (part[0][1], part[0][2]);
23060 	      std::swap (part[1][1], part[1][2]);
23061 	    }
23062 	  else
23063 	    {
23064 	      std::swap (part[0][2], part[0][3]);
23065 	      std::swap (part[1][2], part[1][3]);
23066 	    }
23067 	}
23068 
23069       /* If there are more collisions, we can't handle it by reordering.
23070 	 Do an lea to the last part and use only one colliding move.  */
23071       else if (collisions > 1)
23072 	{
23073 	  rtx base, addr, tls_base = NULL_RTX;
23074 
23075 	  collisions = 1;
23076 
23077 	  base = part[0][nparts - 1];
23078 
23079 	  /* Handle the case when the last part isn't valid for lea.
23080 	     Happens in 64-bit mode storing the 12-byte XFmode.  */
23081 	  if (GET_MODE (base) != Pmode)
23082 	    base = gen_rtx_REG (Pmode, REGNO (base));
23083 
23084 	  addr = XEXP (part[1][0], 0);
23085 	  if (TARGET_TLS_DIRECT_SEG_REFS)
23086 	    {
23087 	      struct ix86_address parts;
23088 	      int ok = ix86_decompose_address (addr, &parts);
23089 	      gcc_assert (ok);
23090 	      if (parts.seg == DEFAULT_TLS_SEG_REG)
23091 		{
23092 		  /* It is not valid to use %gs: or %fs: in
23093 		     lea though, so we need to remove it from the
23094 		     address used for lea and add it to each individual
23095 		     memory loads instead.  */
23096 		  addr = copy_rtx (addr);
23097 		  rtx *x = &addr;
23098 		  while (GET_CODE (*x) == PLUS)
23099 		    {
23100 		      for (i = 0; i < 2; i++)
23101 			{
23102 			  rtx u = XEXP (*x, i);
23103 			  if (GET_CODE (u) == ZERO_EXTEND)
23104 			    u = XEXP (u, 0);
23105 			  if (GET_CODE (u) == UNSPEC
23106 			      && XINT (u, 1) == UNSPEC_TP)
23107 			    {
23108 			      tls_base = XEXP (*x, i);
23109 			      *x = XEXP (*x, 1 - i);
23110 			      break;
23111 			    }
23112 			}
23113 		      if (tls_base)
23114 			break;
23115 		      x = &XEXP (*x, 0);
23116 		    }
23117 		  gcc_assert (tls_base);
23118 		}
23119 	    }
23120 	  emit_insn (gen_rtx_SET (VOIDmode, base, addr));
23121 	  if (tls_base)
23122 	    base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
23123 	  part[1][0] = replace_equiv_address (part[1][0], base);
23124 	  for (i = 1; i < nparts; i++)
23125 	    {
23126 	      if (tls_base)
23127 		base = copy_rtx (base);
23128 	      tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
23129 	      part[1][i] = replace_equiv_address (part[1][i], tmp);
23130 	    }
23131 	}
23132     }
23133 
23134   if (push)
23135     {
23136       if (!TARGET_64BIT)
23137 	{
23138 	  if (nparts == 3)
23139 	    {
23140 	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
23141                 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
23142 					  stack_pointer_rtx, GEN_INT (-4)));
23143 	      emit_move_insn (part[0][2], part[1][2]);
23144 	    }
23145 	  else if (nparts == 4)
23146 	    {
23147 	      emit_move_insn (part[0][3], part[1][3]);
23148 	      emit_move_insn (part[0][2], part[1][2]);
23149 	    }
23150 	}
23151       else
23152 	{
23153 	  /* In 64bit mode we don't have 32bit push available.  In case this is
23154 	     register, it is OK - we will just use larger counterpart.  We also
23155 	     retype memory - these comes from attempt to avoid REX prefix on
23156 	     moving of second half of TFmode value.  */
23157 	  if (GET_MODE (part[1][1]) == SImode)
23158 	    {
23159 	      switch (GET_CODE (part[1][1]))
23160 		{
23161 		case MEM:
23162 		  part[1][1] = adjust_address (part[1][1], DImode, 0);
23163 		  break;
23164 
23165 		case REG:
23166 		  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
23167 		  break;
23168 
23169 		default:
23170 		  gcc_unreachable ();
23171 		}
23172 
23173 	      if (GET_MODE (part[1][0]) == SImode)
23174 		part[1][0] = part[1][1];
23175 	    }
23176 	}
23177       emit_move_insn (part[0][1], part[1][1]);
23178       emit_move_insn (part[0][0], part[1][0]);
23179       return;
23180     }
23181 
23182   /* Choose correct order to not overwrite the source before it is copied.  */
23183   if ((REG_P (part[0][0])
23184        && REG_P (part[1][1])
23185        && (REGNO (part[0][0]) == REGNO (part[1][1])
23186 	   || (nparts == 3
23187 	       && REGNO (part[0][0]) == REGNO (part[1][2]))
23188 	   || (nparts == 4
23189 	       && REGNO (part[0][0]) == REGNO (part[1][3]))))
23190       || (collisions > 0
23191 	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
23192     {
23193       for (i = 0, j = nparts - 1; i < nparts; i++, j--)
23194 	{
23195 	  operands[2 + i] = part[0][j];
23196 	  operands[6 + i] = part[1][j];
23197 	}
23198     }
23199   else
23200     {
23201       for (i = 0; i < nparts; i++)
23202 	{
23203 	  operands[2 + i] = part[0][i];
23204 	  operands[6 + i] = part[1][i];
23205 	}
23206     }
23207 
23208   /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
23209   if (optimize_insn_for_size_p ())
23210     {
23211       for (j = 0; j < nparts - 1; j++)
23212 	if (CONST_INT_P (operands[6 + j])
23213 	    && operands[6 + j] != const0_rtx
23214 	    && REG_P (operands[2 + j]))
23215 	  for (i = j; i < nparts - 1; i++)
23216 	    if (CONST_INT_P (operands[7 + i])
23217 		&& INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23218 	      operands[7 + i] = operands[2 + j];
23219     }
23220 
23221   for (i = 0; i < nparts; i++)
23222     emit_move_insn (operands[2 + i], operands[6 + i]);
23223 
23224   return;
23225 }
23226 
23227 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23228    left shift by a constant, either using a single shift or
23229    a sequence of add instructions.  */
23230 
23231 static void
23232 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23233 {
23234   rtx (*insn)(rtx, rtx, rtx);
23235 
23236   if (count == 1
23237       || (count * ix86_cost->add <= ix86_cost->shift_const
23238 	  && !optimize_insn_for_size_p ()))
23239     {
23240       insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23241       while (count-- > 0)
23242 	emit_insn (insn (operand, operand, operand));
23243     }
23244   else
23245     {
23246       insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23247       emit_insn (insn (operand, operand, GEN_INT (count)));
23248     }
23249 }
23250 
23251 void
23252 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23253 {
23254   rtx (*gen_ashl3)(rtx, rtx, rtx);
23255   rtx (*gen_shld)(rtx, rtx, rtx);
23256   int half_width = GET_MODE_BITSIZE (mode) >> 1;
23257 
23258   rtx low[2], high[2];
23259   int count;
23260 
23261   if (CONST_INT_P (operands[2]))
23262     {
23263       split_double_mode (mode, operands, 2, low, high);
23264       count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23265 
23266       if (count >= half_width)
23267 	{
23268 	  emit_move_insn (high[0], low[1]);
23269 	  emit_move_insn (low[0], const0_rtx);
23270 
23271 	  if (count > half_width)
23272 	    ix86_expand_ashl_const (high[0], count - half_width, mode);
23273 	}
23274       else
23275 	{
23276 	  gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23277 
23278 	  if (!rtx_equal_p (operands[0], operands[1]))
23279 	    emit_move_insn (operands[0], operands[1]);
23280 
23281 	  emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23282 	  ix86_expand_ashl_const (low[0], count, mode);
23283 	}
23284       return;
23285     }
23286 
23287   split_double_mode (mode, operands, 1, low, high);
23288 
23289   gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23290 
23291   if (operands[1] == const1_rtx)
23292     {
23293       /* Assuming we've chosen a QImode capable registers, then 1 << N
23294 	 can be done with two 32/64-bit shifts, no branches, no cmoves.  */
23295       if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23296 	{
23297 	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23298 
23299 	  ix86_expand_clear (low[0]);
23300 	  ix86_expand_clear (high[0]);
23301 	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23302 
23303 	  d = gen_lowpart (QImode, low[0]);
23304 	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23305 	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
23306 	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
23307 
23308 	  d = gen_lowpart (QImode, high[0]);
23309 	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23310 	  s = gen_rtx_NE (QImode, flags, const0_rtx);
23311 	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
23312 	}
23313 
23314       /* Otherwise, we can get the same results by manually performing
23315 	 a bit extract operation on bit 5/6, and then performing the two
23316 	 shifts.  The two methods of getting 0/1 into low/high are exactly
23317 	 the same size.  Avoiding the shift in the bit extract case helps
23318 	 pentium4 a bit; no one else seems to care much either way.  */
23319       else
23320 	{
23321 	  machine_mode half_mode;
23322 	  rtx (*gen_lshr3)(rtx, rtx, rtx);
23323 	  rtx (*gen_and3)(rtx, rtx, rtx);
23324 	  rtx (*gen_xor3)(rtx, rtx, rtx);
23325 	  HOST_WIDE_INT bits;
23326 	  rtx x;
23327 
23328 	  if (mode == DImode)
23329 	    {
23330 	      half_mode = SImode;
23331 	      gen_lshr3 = gen_lshrsi3;
23332 	      gen_and3 = gen_andsi3;
23333 	      gen_xor3 = gen_xorsi3;
23334 	      bits = 5;
23335 	    }
23336 	  else
23337 	    {
23338 	      half_mode = DImode;
23339 	      gen_lshr3 = gen_lshrdi3;
23340 	      gen_and3 = gen_anddi3;
23341 	      gen_xor3 = gen_xordi3;
23342 	      bits = 6;
23343 	    }
23344 
23345 	  if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23346 	    x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23347 	  else
23348 	    x = gen_lowpart (half_mode, operands[2]);
23349 	  emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23350 
23351 	  emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23352 	  emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23353 	  emit_move_insn (low[0], high[0]);
23354 	  emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23355 	}
23356 
23357       emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23358       emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23359       return;
23360     }
23361 
23362   if (operands[1] == constm1_rtx)
23363     {
23364       /* For -1 << N, we can avoid the shld instruction, because we
23365 	 know that we're shifting 0...31/63 ones into a -1.  */
23366       emit_move_insn (low[0], constm1_rtx);
23367       if (optimize_insn_for_size_p ())
23368 	emit_move_insn (high[0], low[0]);
23369       else
23370 	emit_move_insn (high[0], constm1_rtx);
23371     }
23372   else
23373     {
23374       gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23375 
23376       if (!rtx_equal_p (operands[0], operands[1]))
23377 	emit_move_insn (operands[0], operands[1]);
23378 
23379       split_double_mode (mode, operands, 1, low, high);
23380       emit_insn (gen_shld (high[0], low[0], operands[2]));
23381     }
23382 
23383   emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23384 
23385   if (TARGET_CMOVE && scratch)
23386     {
23387       rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23388 	= mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23389 
23390       ix86_expand_clear (scratch);
23391       emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23392     }
23393   else
23394     {
23395       rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23396 	= mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23397 
23398       emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23399     }
23400 }
23401 
23402 void
23403 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23404 {
23405   rtx (*gen_ashr3)(rtx, rtx, rtx)
23406     = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23407   rtx (*gen_shrd)(rtx, rtx, rtx);
23408   int half_width = GET_MODE_BITSIZE (mode) >> 1;
23409 
23410   rtx low[2], high[2];
23411   int count;
23412 
23413   if (CONST_INT_P (operands[2]))
23414     {
23415       split_double_mode (mode, operands, 2, low, high);
23416       count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23417 
23418       if (count == GET_MODE_BITSIZE (mode) - 1)
23419 	{
23420 	  emit_move_insn (high[0], high[1]);
23421 	  emit_insn (gen_ashr3 (high[0], high[0],
23422 				GEN_INT (half_width - 1)));
23423 	  emit_move_insn (low[0], high[0]);
23424 
23425 	}
23426       else if (count >= half_width)
23427 	{
23428 	  emit_move_insn (low[0], high[1]);
23429 	  emit_move_insn (high[0], low[0]);
23430 	  emit_insn (gen_ashr3 (high[0], high[0],
23431 				GEN_INT (half_width - 1)));
23432 
23433 	  if (count > half_width)
23434 	    emit_insn (gen_ashr3 (low[0], low[0],
23435 				  GEN_INT (count - half_width)));
23436 	}
23437       else
23438 	{
23439 	  gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23440 
23441 	  if (!rtx_equal_p (operands[0], operands[1]))
23442 	    emit_move_insn (operands[0], operands[1]);
23443 
23444 	  emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23445 	  emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23446 	}
23447     }
23448   else
23449     {
23450       gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23451 
23452      if (!rtx_equal_p (operands[0], operands[1]))
23453 	emit_move_insn (operands[0], operands[1]);
23454 
23455       split_double_mode (mode, operands, 1, low, high);
23456 
23457       emit_insn (gen_shrd (low[0], high[0], operands[2]));
23458       emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23459 
23460       if (TARGET_CMOVE && scratch)
23461 	{
23462 	  rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23463 	    = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23464 
23465 	  emit_move_insn (scratch, high[0]);
23466 	  emit_insn (gen_ashr3 (scratch, scratch,
23467 				GEN_INT (half_width - 1)));
23468 	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23469 					  scratch));
23470 	}
23471       else
23472 	{
23473 	  rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23474 	    = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23475 
23476 	  emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23477 	}
23478     }
23479 }
23480 
23481 void
23482 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23483 {
23484   rtx (*gen_lshr3)(rtx, rtx, rtx)
23485     = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23486   rtx (*gen_shrd)(rtx, rtx, rtx);
23487   int half_width = GET_MODE_BITSIZE (mode) >> 1;
23488 
23489   rtx low[2], high[2];
23490   int count;
23491 
23492   if (CONST_INT_P (operands[2]))
23493     {
23494       split_double_mode (mode, operands, 2, low, high);
23495       count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23496 
23497       if (count >= half_width)
23498 	{
23499 	  emit_move_insn (low[0], high[1]);
23500 	  ix86_expand_clear (high[0]);
23501 
23502 	  if (count > half_width)
23503 	    emit_insn (gen_lshr3 (low[0], low[0],
23504 				  GEN_INT (count - half_width)));
23505 	}
23506       else
23507 	{
23508 	  gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23509 
23510 	  if (!rtx_equal_p (operands[0], operands[1]))
23511 	    emit_move_insn (operands[0], operands[1]);
23512 
23513 	  emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23514 	  emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23515 	}
23516     }
23517   else
23518     {
23519       gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23520 
23521       if (!rtx_equal_p (operands[0], operands[1]))
23522 	emit_move_insn (operands[0], operands[1]);
23523 
23524       split_double_mode (mode, operands, 1, low, high);
23525 
23526       emit_insn (gen_shrd (low[0], high[0], operands[2]));
23527       emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23528 
23529       if (TARGET_CMOVE && scratch)
23530 	{
23531 	  rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23532 	    = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23533 
23534 	  ix86_expand_clear (scratch);
23535 	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23536 					  scratch));
23537 	}
23538       else
23539 	{
23540 	  rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23541 	    = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23542 
23543 	  emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23544 	}
23545     }
23546 }
23547 
23548 /* Predict just emitted jump instruction to be taken with probability PROB.  */
23549 static void
23550 predict_jump (int prob)
23551 {
23552   rtx insn = get_last_insn ();
23553   gcc_assert (JUMP_P (insn));
23554   add_int_reg_note (insn, REG_BR_PROB, prob);
23555 }
23556 
23557 /* Helper function for the string operations below.  Dest VARIABLE whether
23558    it is aligned to VALUE bytes.  If true, jump to the label.  */
23559 static rtx_code_label *
23560 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23561 {
23562   rtx_code_label *label = gen_label_rtx ();
23563   rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23564   if (GET_MODE (variable) == DImode)
23565     emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23566   else
23567     emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23568   emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23569 			   1, label);
23570   if (epilogue)
23571     predict_jump (REG_BR_PROB_BASE * 50 / 100);
23572   else
23573     predict_jump (REG_BR_PROB_BASE * 90 / 100);
23574   return label;
23575 }
23576 
23577 /* Adjust COUNTER by the VALUE.  */
23578 static void
23579 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23580 {
23581   rtx (*gen_add)(rtx, rtx, rtx)
23582     = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23583 
23584   emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23585 }
23586 
23587 /* Zero extend possibly SImode EXP to Pmode register.  */
23588 rtx
23589 ix86_zero_extend_to_Pmode (rtx exp)
23590 {
23591   return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23592 }
23593 
23594 /* Divide COUNTREG by SCALE.  */
23595 static rtx
23596 scale_counter (rtx countreg, int scale)
23597 {
23598   rtx sc;
23599 
23600   if (scale == 1)
23601     return countreg;
23602   if (CONST_INT_P (countreg))
23603     return GEN_INT (INTVAL (countreg) / scale);
23604   gcc_assert (REG_P (countreg));
23605 
23606   sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23607 			    GEN_INT (exact_log2 (scale)),
23608 			    NULL, 1, OPTAB_DIRECT);
23609   return sc;
23610 }
23611 
23612 /* Return mode for the memcpy/memset loop counter.  Prefer SImode over
23613    DImode for constant loop counts.  */
23614 
23615 static machine_mode
23616 counter_mode (rtx count_exp)
23617 {
23618   if (GET_MODE (count_exp) != VOIDmode)
23619     return GET_MODE (count_exp);
23620   if (!CONST_INT_P (count_exp))
23621     return Pmode;
23622   if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23623     return DImode;
23624   return SImode;
23625 }
23626 
23627 /* Copy the address to a Pmode register.  This is used for x32 to
23628    truncate DImode TLS address to a SImode register. */
23629 
23630 static rtx
23631 ix86_copy_addr_to_reg (rtx addr)
23632 {
23633   rtx reg;
23634   if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23635     {
23636       reg = copy_addr_to_reg (addr);
23637       REG_POINTER (reg) = 1;
23638       return reg;
23639     }
23640   else
23641     {
23642       gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23643       reg = copy_to_mode_reg (DImode, addr);
23644       REG_POINTER (reg) = 1;
23645       return gen_rtx_SUBREG (SImode, reg, 0);
23646     }
23647 }
23648 
23649 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23650    to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23651    specified in bytes.  When ISSETMEM is TRUE, output the equivalent loop to set
23652    memory by VALUE (supposed to be in MODE).
23653 
23654    The size is rounded down to whole number of chunk size moved at once.
23655    SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info.  */
23656 
23657 
23658 static void
23659 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23660 			       rtx destptr, rtx srcptr, rtx value,
23661 			       rtx count, machine_mode mode, int unroll,
23662 			       int expected_size, bool issetmem)
23663 {
23664   rtx_code_label *out_label, *top_label;
23665   rtx iter, tmp;
23666   machine_mode iter_mode = counter_mode (count);
23667   int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23668   rtx piece_size = GEN_INT (piece_size_n);
23669   rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23670   rtx size;
23671   int i;
23672 
23673   top_label = gen_label_rtx ();
23674   out_label = gen_label_rtx ();
23675   iter = gen_reg_rtx (iter_mode);
23676 
23677   size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23678 			      NULL, 1, OPTAB_DIRECT);
23679   /* Those two should combine.  */
23680   if (piece_size == const1_rtx)
23681     {
23682       emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23683 			       true, out_label);
23684       predict_jump (REG_BR_PROB_BASE * 10 / 100);
23685     }
23686   emit_move_insn (iter, const0_rtx);
23687 
23688   emit_label (top_label);
23689 
23690   tmp = convert_modes (Pmode, iter_mode, iter, true);
23691 
23692   /* This assert could be relaxed - in this case we'll need to compute
23693      smallest power of two, containing in PIECE_SIZE_N and pass it to
23694      offset_address.  */
23695   gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23696   destmem = offset_address (destmem, tmp, piece_size_n);
23697   destmem = adjust_address (destmem, mode, 0);
23698 
23699   if (!issetmem)
23700     {
23701       srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23702       srcmem = adjust_address (srcmem, mode, 0);
23703 
23704       /* When unrolling for chips that reorder memory reads and writes,
23705 	 we can save registers by using single temporary.
23706 	 Also using 4 temporaries is overkill in 32bit mode.  */
23707       if (!TARGET_64BIT && 0)
23708 	{
23709 	  for (i = 0; i < unroll; i++)
23710 	    {
23711 	      if (i)
23712 		{
23713 		  destmem =
23714 		    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23715 		  srcmem =
23716 		    adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23717 		}
23718 	      emit_move_insn (destmem, srcmem);
23719 	    }
23720 	}
23721       else
23722 	{
23723 	  rtx tmpreg[4];
23724 	  gcc_assert (unroll <= 4);
23725 	  for (i = 0; i < unroll; i++)
23726 	    {
23727 	      tmpreg[i] = gen_reg_rtx (mode);
23728 	      if (i)
23729 		{
23730 		  srcmem =
23731 		    adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23732 		}
23733 	      emit_move_insn (tmpreg[i], srcmem);
23734 	    }
23735 	  for (i = 0; i < unroll; i++)
23736 	    {
23737 	      if (i)
23738 		{
23739 		  destmem =
23740 		    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23741 		}
23742 	      emit_move_insn (destmem, tmpreg[i]);
23743 	    }
23744 	}
23745     }
23746   else
23747     for (i = 0; i < unroll; i++)
23748       {
23749 	if (i)
23750 	  destmem =
23751 	    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23752 	emit_move_insn (destmem, value);
23753       }
23754 
23755   tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23756 			     true, OPTAB_LIB_WIDEN);
23757   if (tmp != iter)
23758     emit_move_insn (iter, tmp);
23759 
23760   emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23761 			   true, top_label);
23762   if (expected_size != -1)
23763     {
23764       expected_size /= GET_MODE_SIZE (mode) * unroll;
23765       if (expected_size == 0)
23766 	predict_jump (0);
23767       else if (expected_size > REG_BR_PROB_BASE)
23768 	predict_jump (REG_BR_PROB_BASE - 1);
23769       else
23770         predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23771     }
23772   else
23773     predict_jump (REG_BR_PROB_BASE * 80 / 100);
23774   iter = ix86_zero_extend_to_Pmode (iter);
23775   tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23776 			     true, OPTAB_LIB_WIDEN);
23777   if (tmp != destptr)
23778     emit_move_insn (destptr, tmp);
23779   if (!issetmem)
23780     {
23781       tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23782 				 true, OPTAB_LIB_WIDEN);
23783       if (tmp != srcptr)
23784 	emit_move_insn (srcptr, tmp);
23785     }
23786   emit_label (out_label);
23787 }
23788 
23789 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23790    When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23791    When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23792    For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23793    ORIG_VALUE is the original value passed to memset to fill the memory with.
23794    Other arguments have same meaning as for previous function.  */
23795 
23796 static void
23797 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23798 			   rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23799 			   rtx count,
23800 			   machine_mode mode, bool issetmem)
23801 {
23802   rtx destexp;
23803   rtx srcexp;
23804   rtx countreg;
23805   HOST_WIDE_INT rounded_count;
23806 
23807   /* If possible, it is shorter to use rep movs.
23808      TODO: Maybe it is better to move this logic to decide_alg.  */
23809   if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23810       && (!issetmem || orig_value == const0_rtx))
23811     mode = SImode;
23812 
23813   if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23814     destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23815 
23816   countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23817 						       GET_MODE_SIZE (mode)));
23818   if (mode != QImode)
23819     {
23820       destexp = gen_rtx_ASHIFT (Pmode, countreg,
23821 				GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23822       destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23823     }
23824   else
23825     destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23826   if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23827     {
23828       rounded_count = (INTVAL (count)
23829 		       & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23830       destmem = shallow_copy_rtx (destmem);
23831       set_mem_size (destmem, rounded_count);
23832     }
23833   else if (MEM_SIZE_KNOWN_P (destmem))
23834     clear_mem_size (destmem);
23835 
23836   if (issetmem)
23837     {
23838       value = force_reg (mode, gen_lowpart (mode, value));
23839       emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23840     }
23841   else
23842     {
23843       if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23844 	srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23845       if (mode != QImode)
23846 	{
23847 	  srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23848 				   GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23849 	  srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23850 	}
23851       else
23852 	srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23853       if (CONST_INT_P (count))
23854 	{
23855 	  rounded_count = (INTVAL (count)
23856 			   & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23857 	  srcmem = shallow_copy_rtx (srcmem);
23858 	  set_mem_size (srcmem, rounded_count);
23859 	}
23860       else
23861 	{
23862 	  if (MEM_SIZE_KNOWN_P (srcmem))
23863 	    clear_mem_size (srcmem);
23864 	}
23865       emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23866 			      destexp, srcexp));
23867     }
23868 }
23869 
23870 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23871    DESTMEM.
23872    SRC is passed by pointer to be updated on return.
23873    Return value is updated DST.  */
23874 static rtx
23875 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23876 	     HOST_WIDE_INT size_to_move)
23877 {
23878   rtx dst = destmem, src = *srcmem, adjust, tempreg;
23879   enum insn_code code;
23880   machine_mode move_mode;
23881   int piece_size, i;
23882 
23883   /* Find the widest mode in which we could perform moves.
23884      Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23885      it until move of such size is supported.  */
23886   piece_size = 1 << floor_log2 (size_to_move);
23887   move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23888   code = optab_handler (mov_optab, move_mode);
23889   while (code == CODE_FOR_nothing && piece_size > 1)
23890     {
23891       piece_size >>= 1;
23892       move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23893       code = optab_handler (mov_optab, move_mode);
23894     }
23895 
23896   /* Find the corresponding vector mode with the same size as MOVE_MODE.
23897      MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.).  */
23898   if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23899     {
23900       int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23901       move_mode = mode_for_vector (word_mode, nunits);
23902       code = optab_handler (mov_optab, move_mode);
23903       if (code == CODE_FOR_nothing)
23904 	{
23905 	  move_mode = word_mode;
23906 	  piece_size = GET_MODE_SIZE (move_mode);
23907 	  code = optab_handler (mov_optab, move_mode);
23908 	}
23909     }
23910   gcc_assert (code != CODE_FOR_nothing);
23911 
23912   dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23913   src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23914 
23915   /* Emit moves.  We'll need SIZE_TO_MOVE/PIECE_SIZES moves.  */
23916   gcc_assert (size_to_move % piece_size == 0);
23917   adjust = GEN_INT (piece_size);
23918   for (i = 0; i < size_to_move; i += piece_size)
23919     {
23920       /* We move from memory to memory, so we'll need to do it via
23921 	 a temporary register.  */
23922       tempreg = gen_reg_rtx (move_mode);
23923       emit_insn (GEN_FCN (code) (tempreg, src));
23924       emit_insn (GEN_FCN (code) (dst, tempreg));
23925 
23926       emit_move_insn (destptr,
23927 		      gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23928       emit_move_insn (srcptr,
23929 		      gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23930 
23931       dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23932 					  piece_size);
23933       src = adjust_automodify_address_nv (src, move_mode, srcptr,
23934 					  piece_size);
23935     }
23936 
23937   /* Update DST and SRC rtx.  */
23938   *srcmem = src;
23939   return dst;
23940 }
23941 
23942 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST.  */
23943 static void
23944 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23945 			rtx destptr, rtx srcptr, rtx count, int max_size)
23946 {
23947   rtx src, dest;
23948   if (CONST_INT_P (count))
23949     {
23950       HOST_WIDE_INT countval = INTVAL (count);
23951       HOST_WIDE_INT epilogue_size = countval % max_size;
23952       int i;
23953 
23954       /* For now MAX_SIZE should be a power of 2.  This assert could be
23955 	 relaxed, but it'll require a bit more complicated epilogue
23956 	 expanding.  */
23957       gcc_assert ((max_size & (max_size - 1)) == 0);
23958       for (i = max_size; i >= 1; i >>= 1)
23959 	{
23960 	  if (epilogue_size & i)
23961 	    destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23962 	}
23963       return;
23964     }
23965   if (max_size > 8)
23966     {
23967       count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23968 				    count, 1, OPTAB_DIRECT);
23969       expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23970 				     count, QImode, 1, 4, false);
23971       return;
23972     }
23973 
23974   /* When there are stringops, we can cheaply increase dest and src pointers.
23975      Otherwise we save code size by maintaining offset (zero is readily
23976      available from preceding rep operation) and using x86 addressing modes.
23977    */
23978   if (TARGET_SINGLE_STRINGOP)
23979     {
23980       if (max_size > 4)
23981 	{
23982 	  rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23983 	  src = change_address (srcmem, SImode, srcptr);
23984 	  dest = change_address (destmem, SImode, destptr);
23985 	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
23986 	  emit_label (label);
23987 	  LABEL_NUSES (label) = 1;
23988 	}
23989       if (max_size > 2)
23990 	{
23991 	  rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23992 	  src = change_address (srcmem, HImode, srcptr);
23993 	  dest = change_address (destmem, HImode, destptr);
23994 	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
23995 	  emit_label (label);
23996 	  LABEL_NUSES (label) = 1;
23997 	}
23998       if (max_size > 1)
23999 	{
24000 	  rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24001 	  src = change_address (srcmem, QImode, srcptr);
24002 	  dest = change_address (destmem, QImode, destptr);
24003 	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
24004 	  emit_label (label);
24005 	  LABEL_NUSES (label) = 1;
24006 	}
24007     }
24008   else
24009     {
24010       rtx offset = force_reg (Pmode, const0_rtx);
24011       rtx tmp;
24012 
24013       if (max_size > 4)
24014 	{
24015 	  rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24016 	  src = change_address (srcmem, SImode, srcptr);
24017 	  dest = change_address (destmem, SImode, destptr);
24018 	  emit_move_insn (dest, src);
24019 	  tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
24020 				     true, OPTAB_LIB_WIDEN);
24021 	  if (tmp != offset)
24022 	    emit_move_insn (offset, tmp);
24023 	  emit_label (label);
24024 	  LABEL_NUSES (label) = 1;
24025 	}
24026       if (max_size > 2)
24027 	{
24028 	  rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24029 	  tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
24030 	  src = change_address (srcmem, HImode, tmp);
24031 	  tmp = gen_rtx_PLUS (Pmode, destptr, offset);
24032 	  dest = change_address (destmem, HImode, tmp);
24033 	  emit_move_insn (dest, src);
24034 	  tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
24035 				     true, OPTAB_LIB_WIDEN);
24036 	  if (tmp != offset)
24037 	    emit_move_insn (offset, tmp);
24038 	  emit_label (label);
24039 	  LABEL_NUSES (label) = 1;
24040 	}
24041       if (max_size > 1)
24042 	{
24043 	  rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24044 	  tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
24045 	  src = change_address (srcmem, QImode, tmp);
24046 	  tmp = gen_rtx_PLUS (Pmode, destptr, offset);
24047 	  dest = change_address (destmem, QImode, tmp);
24048 	  emit_move_insn (dest, src);
24049 	  emit_label (label);
24050 	  LABEL_NUSES (label) = 1;
24051 	}
24052     }
24053 }
24054 
24055 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
24056    with value PROMOTED_VAL.
24057    SRC is passed by pointer to be updated on return.
24058    Return value is updated DST.  */
24059 static rtx
24060 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
24061 	     HOST_WIDE_INT size_to_move)
24062 {
24063   rtx dst = destmem, adjust;
24064   enum insn_code code;
24065   machine_mode move_mode;
24066   int piece_size, i;
24067 
24068   /* Find the widest mode in which we could perform moves.
24069      Start with the biggest power of 2 less than SIZE_TO_MOVE and half
24070      it until move of such size is supported.  */
24071   move_mode = GET_MODE (promoted_val);
24072   if (move_mode == VOIDmode)
24073     move_mode = QImode;
24074   if (size_to_move < GET_MODE_SIZE (move_mode))
24075     {
24076       move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
24077       promoted_val = gen_lowpart (move_mode, promoted_val);
24078     }
24079   piece_size = GET_MODE_SIZE (move_mode);
24080   code = optab_handler (mov_optab, move_mode);
24081   gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
24082 
24083   dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
24084 
24085   /* Emit moves.  We'll need SIZE_TO_MOVE/PIECE_SIZES moves.  */
24086   gcc_assert (size_to_move % piece_size == 0);
24087   adjust = GEN_INT (piece_size);
24088   for (i = 0; i < size_to_move; i += piece_size)
24089     {
24090       if (piece_size <= GET_MODE_SIZE (word_mode))
24091 	{
24092 	  emit_insn (gen_strset (destptr, dst, promoted_val));
24093 	  dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24094 					      piece_size);
24095 	  continue;
24096 	}
24097 
24098       emit_insn (GEN_FCN (code) (dst, promoted_val));
24099 
24100       emit_move_insn (destptr,
24101 		      gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
24102 
24103       dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24104 					  piece_size);
24105     }
24106 
24107   /* Update DST rtx.  */
24108   return dst;
24109 }
24110 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
24111 static void
24112 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
24113 				 rtx count, int max_size)
24114 {
24115   count =
24116     expand_simple_binop (counter_mode (count), AND, count,
24117 			 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
24118   expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
24119 				 gen_lowpart (QImode, value), count, QImode,
24120 				 1, max_size / 2, true);
24121 }
24122 
24123 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
24124 static void
24125 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
24126 			rtx count, int max_size)
24127 {
24128   rtx dest;
24129 
24130   if (CONST_INT_P (count))
24131     {
24132       HOST_WIDE_INT countval = INTVAL (count);
24133       HOST_WIDE_INT epilogue_size = countval % max_size;
24134       int i;
24135 
24136       /* For now MAX_SIZE should be a power of 2.  This assert could be
24137 	 relaxed, but it'll require a bit more complicated epilogue
24138 	 expanding.  */
24139       gcc_assert ((max_size & (max_size - 1)) == 0);
24140       for (i = max_size; i >= 1; i >>= 1)
24141 	{
24142 	  if (epilogue_size & i)
24143 	    {
24144 	      if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24145 		destmem = emit_memset (destmem, destptr, vec_value, i);
24146 	      else
24147 		destmem = emit_memset (destmem, destptr, value, i);
24148 	    }
24149 	}
24150       return;
24151     }
24152   if (max_size > 32)
24153     {
24154       expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
24155       return;
24156     }
24157   if (max_size > 16)
24158     {
24159       rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
24160       if (TARGET_64BIT)
24161 	{
24162 	  dest = change_address (destmem, DImode, destptr);
24163 	  emit_insn (gen_strset (destptr, dest, value));
24164 	  dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
24165 	  emit_insn (gen_strset (destptr, dest, value));
24166 	}
24167       else
24168 	{
24169 	  dest = change_address (destmem, SImode, destptr);
24170 	  emit_insn (gen_strset (destptr, dest, value));
24171 	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24172 	  emit_insn (gen_strset (destptr, dest, value));
24173 	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
24174 	  emit_insn (gen_strset (destptr, dest, value));
24175 	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
24176 	  emit_insn (gen_strset (destptr, dest, value));
24177 	}
24178       emit_label (label);
24179       LABEL_NUSES (label) = 1;
24180     }
24181   if (max_size > 8)
24182     {
24183       rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
24184       if (TARGET_64BIT)
24185 	{
24186 	  dest = change_address (destmem, DImode, destptr);
24187 	  emit_insn (gen_strset (destptr, dest, value));
24188 	}
24189       else
24190 	{
24191 	  dest = change_address (destmem, SImode, destptr);
24192 	  emit_insn (gen_strset (destptr, dest, value));
24193 	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24194 	  emit_insn (gen_strset (destptr, dest, value));
24195 	}
24196       emit_label (label);
24197       LABEL_NUSES (label) = 1;
24198     }
24199   if (max_size > 4)
24200     {
24201       rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24202       dest = change_address (destmem, SImode, destptr);
24203       emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24204       emit_label (label);
24205       LABEL_NUSES (label) = 1;
24206     }
24207   if (max_size > 2)
24208     {
24209       rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24210       dest = change_address (destmem, HImode, destptr);
24211       emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24212       emit_label (label);
24213       LABEL_NUSES (label) = 1;
24214     }
24215   if (max_size > 1)
24216     {
24217       rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24218       dest = change_address (destmem, QImode, destptr);
24219       emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24220       emit_label (label);
24221       LABEL_NUSES (label) = 1;
24222     }
24223 }
24224 
24225 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24226    DESTMEM to align it to DESIRED_ALIGNMENT.  Original alignment is ALIGN.
24227    Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24228    ignored.
24229    Return value is updated DESTMEM.  */
24230 static rtx
24231 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24232 				  rtx destptr, rtx srcptr, rtx value,
24233 				  rtx vec_value, rtx count, int align,
24234 				  int desired_alignment, bool issetmem)
24235 {
24236   int i;
24237   for (i = 1; i < desired_alignment; i <<= 1)
24238     {
24239       if (align <= i)
24240 	{
24241 	  rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24242 	  if (issetmem)
24243 	    {
24244 	      if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24245 		destmem = emit_memset (destmem, destptr, vec_value, i);
24246 	      else
24247 		destmem = emit_memset (destmem, destptr, value, i);
24248 	    }
24249 	  else
24250 	    destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24251 	  ix86_adjust_counter (count, i);
24252 	  emit_label (label);
24253 	  LABEL_NUSES (label) = 1;
24254 	  set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24255 	}
24256     }
24257   return destmem;
24258 }
24259 
24260 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24261    or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24262    and jump to DONE_LABEL.  */
24263 static void
24264 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24265 			       rtx destptr, rtx srcptr,
24266 			       rtx value, rtx vec_value,
24267 			       rtx count, int size,
24268 			       rtx done_label, bool issetmem)
24269 {
24270   rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24271   machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24272   rtx modesize;
24273   int n;
24274 
24275   /* If we do not have vector value to copy, we must reduce size.  */
24276   if (issetmem)
24277     {
24278       if (!vec_value)
24279 	{
24280 	  if (GET_MODE (value) == VOIDmode && size > 8)
24281 	    mode = Pmode;
24282 	  else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24283 	    mode = GET_MODE (value);
24284 	}
24285       else
24286 	mode = GET_MODE (vec_value), value = vec_value;
24287     }
24288   else
24289     {
24290       /* Choose appropriate vector mode.  */
24291       if (size >= 32)
24292 	mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24293       else if (size >= 16)
24294 	mode = TARGET_SSE ? V16QImode : DImode;
24295       srcmem = change_address (srcmem, mode, srcptr);
24296     }
24297   destmem = change_address (destmem, mode, destptr);
24298   modesize = GEN_INT (GET_MODE_SIZE (mode));
24299   gcc_assert (GET_MODE_SIZE (mode) <= size);
24300   for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24301     {
24302       if (issetmem)
24303 	emit_move_insn (destmem, gen_lowpart (mode, value));
24304       else
24305 	{
24306           emit_move_insn (destmem, srcmem);
24307           srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24308 	}
24309       destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24310     }
24311 
24312   destmem = offset_address (destmem, count, 1);
24313   destmem = offset_address (destmem, GEN_INT (-2 * size),
24314 			    GET_MODE_SIZE (mode));
24315   if (!issetmem)
24316     {
24317       srcmem = offset_address (srcmem, count, 1);
24318       srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24319 			       GET_MODE_SIZE (mode));
24320     }
24321   for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24322     {
24323       if (issetmem)
24324 	emit_move_insn (destmem, gen_lowpart (mode, value));
24325       else
24326 	{
24327 	  emit_move_insn (destmem, srcmem);
24328 	  srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24329 	}
24330       destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24331     }
24332   emit_jump_insn (gen_jump (done_label));
24333   emit_barrier ();
24334 
24335   emit_label (label);
24336   LABEL_NUSES (label) = 1;
24337 }
24338 
24339 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24340    and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24341    bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24342    proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24343    DONE_LABEL is a label after the whole copying sequence. The label is created
24344    on demand if *DONE_LABEL is NULL.
24345    MIN_SIZE is minimal size of block copied.  This value gets adjusted for new
24346    bounds after the initial copies.
24347 
24348    DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24349    DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24350    we will dispatch to a library call for large blocks.
24351 
24352    In pseudocode we do:
24353 
24354    if (COUNT < SIZE)
24355      {
24356        Assume that SIZE is 4. Bigger sizes are handled analogously
24357        if (COUNT & 4)
24358 	 {
24359 	    copy 4 bytes from SRCPTR to DESTPTR
24360 	    copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24361 	    goto done_label
24362 	 }
24363        if (!COUNT)
24364 	 goto done_label;
24365        copy 1 byte from SRCPTR to DESTPTR
24366        if (COUNT & 2)
24367 	 {
24368 	    copy 2 bytes from SRCPTR to DESTPTR
24369 	    copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24370 	 }
24371      }
24372    else
24373      {
24374        copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24375        copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24376 
24377        OLD_DESPTR = DESTPTR;
24378        Align DESTPTR up to DESIRED_ALIGN
24379        SRCPTR += DESTPTR - OLD_DESTPTR
24380        COUNT -= DEST_PTR - OLD_DESTPTR
24381        if (DYNAMIC_CHECK)
24382 	 Round COUNT down to multiple of SIZE
24383        << optional caller supplied zero size guard is here >>
24384        << optional caller supplied dynamic check is here >>
24385        << caller supplied main copy loop is here >>
24386      }
24387    done_label:
24388   */
24389 static void
24390 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24391 							    rtx *destptr, rtx *srcptr,
24392 							    machine_mode mode,
24393 							    rtx value, rtx vec_value,
24394 							    rtx *count,
24395 							    rtx_code_label **done_label,
24396 							    int size,
24397 							    int desired_align,
24398 							    int align,
24399 							    unsigned HOST_WIDE_INT *min_size,
24400 							    bool dynamic_check,
24401 							    bool issetmem)
24402 {
24403   rtx_code_label *loop_label = NULL, *label;
24404   int n;
24405   rtx modesize;
24406   int prolog_size = 0;
24407   rtx mode_value;
24408 
24409   /* Chose proper value to copy.  */
24410   if (issetmem && VECTOR_MODE_P (mode))
24411     mode_value = vec_value;
24412   else
24413     mode_value = value;
24414   gcc_assert (GET_MODE_SIZE (mode) <= size);
24415 
24416   /* See if block is big or small, handle small blocks.  */
24417   if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24418     {
24419       int size2 = size;
24420       loop_label = gen_label_rtx ();
24421 
24422       if (!*done_label)
24423 	*done_label = gen_label_rtx ();
24424 
24425       emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24426 			       1, loop_label);
24427       size2 >>= 1;
24428 
24429       /* Handle sizes > 3.  */
24430       for (;size2 > 2; size2 >>= 1)
24431 	expand_small_movmem_or_setmem (destmem, srcmem,
24432 				       *destptr, *srcptr,
24433 				       value, vec_value,
24434 				       *count,
24435 				       size2, *done_label, issetmem);
24436       /* Nothing to copy?  Jump to DONE_LABEL if so */
24437       emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24438 			       1, *done_label);
24439 
24440       /* Do a byte copy.  */
24441       destmem = change_address (destmem, QImode, *destptr);
24442       if (issetmem)
24443 	emit_move_insn (destmem, gen_lowpart (QImode, value));
24444       else
24445 	{
24446           srcmem = change_address (srcmem, QImode, *srcptr);
24447           emit_move_insn (destmem, srcmem);
24448 	}
24449 
24450       /* Handle sizes 2 and 3.  */
24451       label = ix86_expand_aligntest (*count, 2, false);
24452       destmem = change_address (destmem, HImode, *destptr);
24453       destmem = offset_address (destmem, *count, 1);
24454       destmem = offset_address (destmem, GEN_INT (-2), 2);
24455       if (issetmem)
24456         emit_move_insn (destmem, gen_lowpart (HImode, value));
24457       else
24458 	{
24459 	  srcmem = change_address (srcmem, HImode, *srcptr);
24460 	  srcmem = offset_address (srcmem, *count, 1);
24461 	  srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24462 	  emit_move_insn (destmem, srcmem);
24463 	}
24464 
24465       emit_label (label);
24466       LABEL_NUSES (label) = 1;
24467       emit_jump_insn (gen_jump (*done_label));
24468       emit_barrier ();
24469     }
24470   else
24471     gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24472 		|| UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24473 
24474   /* Start memcpy for COUNT >= SIZE.  */
24475   if (loop_label)
24476     {
24477        emit_label (loop_label);
24478        LABEL_NUSES (loop_label) = 1;
24479     }
24480 
24481   /* Copy first desired_align bytes.  */
24482   if (!issetmem)
24483     srcmem = change_address (srcmem, mode, *srcptr);
24484   destmem = change_address (destmem, mode, *destptr);
24485   modesize = GEN_INT (GET_MODE_SIZE (mode));
24486   for (n = 0; prolog_size < desired_align - align; n++)
24487     {
24488       if (issetmem)
24489         emit_move_insn (destmem, mode_value);
24490       else
24491 	{
24492           emit_move_insn (destmem, srcmem);
24493           srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24494 	}
24495       destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24496       prolog_size += GET_MODE_SIZE (mode);
24497     }
24498 
24499 
24500   /* Copy last SIZE bytes.  */
24501   destmem = offset_address (destmem, *count, 1);
24502   destmem = offset_address (destmem,
24503 			    GEN_INT (-size - prolog_size),
24504 			    1);
24505   if (issetmem)
24506     emit_move_insn (destmem, mode_value);
24507   else
24508     {
24509       srcmem = offset_address (srcmem, *count, 1);
24510       srcmem = offset_address (srcmem,
24511 			       GEN_INT (-size - prolog_size),
24512 			       1);
24513       emit_move_insn (destmem, srcmem);
24514     }
24515   for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24516     {
24517       destmem = offset_address (destmem, modesize, 1);
24518       if (issetmem)
24519 	emit_move_insn (destmem, mode_value);
24520       else
24521 	{
24522           srcmem = offset_address (srcmem, modesize, 1);
24523           emit_move_insn (destmem, srcmem);
24524 	}
24525     }
24526 
24527   /* Align destination.  */
24528   if (desired_align > 1 && desired_align > align)
24529     {
24530       rtx saveddest = *destptr;
24531 
24532       gcc_assert (desired_align <= size);
24533       /* Align destptr up, place it to new register.  */
24534       *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24535 				      GEN_INT (prolog_size),
24536 				      NULL_RTX, 1, OPTAB_DIRECT);
24537       if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24538 	REG_POINTER (*destptr) = 1;
24539       *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24540 				      GEN_INT (-desired_align),
24541 				      *destptr, 1, OPTAB_DIRECT);
24542       /* See how many bytes we skipped.  */
24543       saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24544 				       *destptr,
24545 				       saveddest, 1, OPTAB_DIRECT);
24546       /* Adjust srcptr and count.  */
24547       if (!issetmem)
24548 	*srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24549 				       saveddest, *srcptr, 1, OPTAB_DIRECT);
24550       *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24551 				    saveddest, *count, 1, OPTAB_DIRECT);
24552       /* We copied at most size + prolog_size.  */
24553       if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24554 	*min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24555       else
24556 	*min_size = 0;
24557 
24558       /* Our loops always round down the block size, but for dispatch to
24559          library we need precise value.  */
24560       if (dynamic_check)
24561 	*count = expand_simple_binop (GET_MODE (*count), AND, *count,
24562 				      GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24563     }
24564   else
24565     {
24566       gcc_assert (prolog_size == 0);
24567       /* Decrease count, so we won't end up copying last word twice.  */
24568       if (!CONST_INT_P (*count))
24569 	*count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24570 				      constm1_rtx, *count, 1, OPTAB_DIRECT);
24571       else
24572 	*count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24573       if (*min_size)
24574 	*min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24575     }
24576 }
24577 
24578 
24579 /* This function is like the previous one, except here we know how many bytes
24580    need to be copied.  That allows us to update alignment not only of DST, which
24581    is returned, but also of SRC, which is passed as a pointer for that
24582    reason.  */
24583 static rtx
24584 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24585 					   rtx srcreg, rtx value, rtx vec_value,
24586 					   int desired_align, int align_bytes,
24587 					   bool issetmem)
24588 {
24589   rtx src = NULL;
24590   rtx orig_dst = dst;
24591   rtx orig_src = NULL;
24592   int piece_size = 1;
24593   int copied_bytes = 0;
24594 
24595   if (!issetmem)
24596     {
24597       gcc_assert (srcp != NULL);
24598       src = *srcp;
24599       orig_src = src;
24600     }
24601 
24602   for (piece_size = 1;
24603        piece_size <= desired_align && copied_bytes < align_bytes;
24604        piece_size <<= 1)
24605     {
24606       if (align_bytes & piece_size)
24607 	{
24608 	  if (issetmem)
24609 	    {
24610 	      if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24611 		dst = emit_memset (dst, destreg, vec_value, piece_size);
24612 	      else
24613 		dst = emit_memset (dst, destreg, value, piece_size);
24614 	    }
24615 	  else
24616 	    dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24617 	  copied_bytes += piece_size;
24618 	}
24619     }
24620   if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24621     set_mem_align (dst, desired_align * BITS_PER_UNIT);
24622   if (MEM_SIZE_KNOWN_P (orig_dst))
24623     set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24624 
24625   if (!issetmem)
24626     {
24627       int src_align_bytes = get_mem_align_offset (src, desired_align
24628 						       * BITS_PER_UNIT);
24629       if (src_align_bytes >= 0)
24630 	src_align_bytes = desired_align - src_align_bytes;
24631       if (src_align_bytes >= 0)
24632 	{
24633 	  unsigned int src_align;
24634 	  for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24635 	    {
24636 	      if ((src_align_bytes & (src_align - 1))
24637 		   == (align_bytes & (src_align - 1)))
24638 		break;
24639 	    }
24640 	  if (src_align > (unsigned int) desired_align)
24641 	    src_align = desired_align;
24642 	  if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24643 	    set_mem_align (src, src_align * BITS_PER_UNIT);
24644 	}
24645       if (MEM_SIZE_KNOWN_P (orig_src))
24646 	set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24647       *srcp = src;
24648     }
24649 
24650   return dst;
24651 }
24652 
24653 /* Return true if ALG can be used in current context.
24654    Assume we expand memset if MEMSET is true.  */
24655 static bool
24656 alg_usable_p (enum stringop_alg alg, bool memset)
24657 {
24658   if (alg == no_stringop)
24659     return false;
24660   if (alg == vector_loop)
24661     return TARGET_SSE || TARGET_AVX;
24662   /* Algorithms using the rep prefix want at least edi and ecx;
24663      additionally, memset wants eax and memcpy wants esi.  Don't
24664      consider such algorithms if the user has appropriated those
24665      registers for their own purposes.	*/
24666   if (alg == rep_prefix_1_byte
24667       || alg == rep_prefix_4_byte
24668       || alg == rep_prefix_8_byte)
24669     return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24670              || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24671   return true;
24672 }
24673 
24674 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation.  */
24675 static enum stringop_alg
24676 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24677 	    unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24678 	    bool memset, bool zero_memset, int *dynamic_check, bool *noalign,
24679 	    bool recur)
24680 {
24681   const struct stringop_algs *algs;
24682   bool optimize_for_speed;
24683   int max = 0;
24684   const struct processor_costs *cost;
24685   int i;
24686   bool any_alg_usable_p = false;
24687 
24688   *noalign = false;
24689   *dynamic_check = -1;
24690 
24691   /* Even if the string operation call is cold, we still might spend a lot
24692      of time processing large blocks.  */
24693   if (optimize_function_for_size_p (cfun)
24694       || (optimize_insn_for_size_p ()
24695  	  && (max_size < 256
24696               || (expected_size != -1 && expected_size < 256))))
24697     optimize_for_speed = false;
24698   else
24699     optimize_for_speed = true;
24700 
24701   cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24702   if (memset)
24703     algs = &cost->memset[TARGET_64BIT != 0];
24704   else
24705     algs = &cost->memcpy[TARGET_64BIT != 0];
24706 
24707   /* See maximal size for user defined algorithm.  */
24708   for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24709     {
24710       enum stringop_alg candidate = algs->size[i].alg;
24711       bool usable = alg_usable_p (candidate, memset);
24712       any_alg_usable_p |= usable;
24713 
24714       if (candidate != libcall && candidate && usable)
24715 	max = algs->size[i].max;
24716     }
24717 
24718   /* If expected size is not known but max size is small enough
24719      so inline version is a win, set expected size into
24720      the range.  */
24721   if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24722       && expected_size == -1)
24723     expected_size = min_size / 2 + max_size / 2;
24724 
24725   /* If user specified the algorithm, honor it if possible.  */
24726   if (ix86_stringop_alg != no_stringop
24727       && alg_usable_p (ix86_stringop_alg, memset))
24728     return ix86_stringop_alg;
24729   /* rep; movq or rep; movl is the smallest variant.  */
24730   else if (!optimize_for_speed)
24731     {
24732       *noalign = true;
24733       if (!count || (count & 3) || (memset && !zero_memset))
24734 	return alg_usable_p (rep_prefix_1_byte, memset)
24735 	       ? rep_prefix_1_byte : loop_1_byte;
24736       else
24737 	return alg_usable_p (rep_prefix_4_byte, memset)
24738 	       ? rep_prefix_4_byte : loop;
24739     }
24740   /* Very tiny blocks are best handled via the loop, REP is expensive to
24741      setup.  */
24742   else if (expected_size != -1 && expected_size < 4)
24743     return loop_1_byte;
24744   else if (expected_size != -1)
24745     {
24746       enum stringop_alg alg = libcall;
24747       bool alg_noalign = false;
24748       for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24749 	{
24750 	  /* We get here if the algorithms that were not libcall-based
24751 	     were rep-prefix based and we are unable to use rep prefixes
24752 	     based on global register usage.  Break out of the loop and
24753 	     use the heuristic below.  */
24754 	  if (algs->size[i].max == 0)
24755 	    break;
24756 	  if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24757 	    {
24758 	      enum stringop_alg candidate = algs->size[i].alg;
24759 
24760 	      if (candidate != libcall && alg_usable_p (candidate, memset))
24761 		{
24762 		  alg = candidate;
24763 		  alg_noalign = algs->size[i].noalign;
24764 		}
24765 	      /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24766 		 last non-libcall inline algorithm.  */
24767 	      if (TARGET_INLINE_ALL_STRINGOPS)
24768 		{
24769 		  /* When the current size is best to be copied by a libcall,
24770 		     but we are still forced to inline, run the heuristic below
24771 		     that will pick code for medium sized blocks.  */
24772 		  if (alg != libcall)
24773 		    {
24774 		      *noalign = alg_noalign;
24775 		      return alg;
24776 		    }
24777 		  else if (!any_alg_usable_p)
24778 		    break;
24779 		}
24780 	      else if (alg_usable_p (candidate, memset))
24781 		{
24782 		  *noalign = algs->size[i].noalign;
24783 		  return candidate;
24784 		}
24785 	    }
24786 	}
24787     }
24788   /* When asked to inline the call anyway, try to pick meaningful choice.
24789      We look for maximal size of block that is faster to copy by hand and
24790      take blocks of at most of that size guessing that average size will
24791      be roughly half of the block.
24792 
24793      If this turns out to be bad, we might simply specify the preferred
24794      choice in ix86_costs.  */
24795   if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24796       && (algs->unknown_size == libcall
24797 	  || !alg_usable_p (algs->unknown_size, memset)))
24798     {
24799       enum stringop_alg alg;
24800       HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
24801 
24802       /* If there aren't any usable algorithms or if recursing already,
24803 	 then recursing on smaller sizes or same size isn't going to
24804 	 find anything.  Just return the simple byte-at-a-time copy loop.  */
24805       if (!any_alg_usable_p || recur)
24806 	{
24807 	  /* Pick something reasonable.  */
24808 	  if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
24809 	    *dynamic_check = 128;
24810 	  return loop_1_byte;
24811 	}
24812       alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
24813 			zero_memset, dynamic_check, noalign, true);
24814       gcc_assert (*dynamic_check == -1);
24815       if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24816 	*dynamic_check = max;
24817       else
24818 	gcc_assert (alg != libcall);
24819       return alg;
24820     }
24821   return (alg_usable_p (algs->unknown_size, memset)
24822 	  ? algs->unknown_size : libcall);
24823 }
24824 
24825 /* Decide on alignment.  We know that the operand is already aligned to ALIGN
24826    (ALIGN can be based on profile feedback and thus it is not 100% guaranteed).  */
24827 static int
24828 decide_alignment (int align,
24829 		  enum stringop_alg alg,
24830 		  int expected_size,
24831 		  machine_mode move_mode)
24832 {
24833   int desired_align = 0;
24834 
24835   gcc_assert (alg != no_stringop);
24836 
24837   if (alg == libcall)
24838     return 0;
24839   if (move_mode == VOIDmode)
24840     return 0;
24841 
24842   desired_align = GET_MODE_SIZE (move_mode);
24843   /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24844      copying whole cacheline at once.  */
24845   if (TARGET_PENTIUMPRO
24846       && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24847     desired_align = 8;
24848 
24849   if (optimize_size)
24850     desired_align = 1;
24851   if (desired_align < align)
24852     desired_align = align;
24853   if (expected_size != -1 && expected_size < 4)
24854     desired_align = align;
24855 
24856   return desired_align;
24857 }
24858 
24859 
24860 /* Helper function for memcpy.  For QImode value 0xXY produce
24861    0xXYXYXYXY of wide specified by MODE.  This is essentially
24862    a * 0x10101010, but we can do slightly better than
24863    synth_mult by unwinding the sequence by hand on CPUs with
24864    slow multiply.  */
24865 static rtx
24866 promote_duplicated_reg (machine_mode mode, rtx val)
24867 {
24868   machine_mode valmode = GET_MODE (val);
24869   rtx tmp;
24870   int nops = mode == DImode ? 3 : 2;
24871 
24872   gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24873   if (val == const0_rtx)
24874     return copy_to_mode_reg (mode, CONST0_RTX (mode));
24875   if (CONST_INT_P (val))
24876     {
24877       HOST_WIDE_INT v = INTVAL (val) & 255;
24878 
24879       v |= v << 8;
24880       v |= v << 16;
24881       if (mode == DImode)
24882         v |= (v << 16) << 16;
24883       return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24884     }
24885 
24886   if (valmode == VOIDmode)
24887     valmode = QImode;
24888   if (valmode != QImode)
24889     val = gen_lowpart (QImode, val);
24890   if (mode == QImode)
24891     return val;
24892   if (!TARGET_PARTIAL_REG_STALL)
24893     nops--;
24894   if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24895       + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24896       <= (ix86_cost->shift_const + ix86_cost->add) * nops
24897           + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24898     {
24899       rtx reg = convert_modes (mode, QImode, val, true);
24900       tmp = promote_duplicated_reg (mode, const1_rtx);
24901       return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24902 				  OPTAB_DIRECT);
24903     }
24904   else
24905     {
24906       rtx reg = convert_modes (mode, QImode, val, true);
24907 
24908       if (!TARGET_PARTIAL_REG_STALL)
24909 	if (mode == SImode)
24910 	  emit_insn (gen_movsi_insv_1 (reg, reg));
24911 	else
24912 	  emit_insn (gen_movdi_insv_1 (reg, reg));
24913       else
24914 	{
24915 	  tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24916 				     NULL, 1, OPTAB_DIRECT);
24917 	  reg =
24918 	    expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24919 	}
24920       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24921 			         NULL, 1, OPTAB_DIRECT);
24922       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24923       if (mode == SImode)
24924 	return reg;
24925       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24926 				 NULL, 1, OPTAB_DIRECT);
24927       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24928       return reg;
24929     }
24930 }
24931 
24932 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24933    be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24934    alignment from ALIGN to DESIRED_ALIGN.  */
24935 static rtx
24936 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24937 				int align)
24938 {
24939   rtx promoted_val;
24940 
24941   if (TARGET_64BIT
24942       && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24943     promoted_val = promote_duplicated_reg (DImode, val);
24944   else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24945     promoted_val = promote_duplicated_reg (SImode, val);
24946   else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24947     promoted_val = promote_duplicated_reg (HImode, val);
24948   else
24949     promoted_val = val;
24950 
24951   return promoted_val;
24952 }
24953 
24954 /* Expand string move (memcpy) ot store (memset) operation.  Use i386 string
24955    operations when profitable.  The code depends upon architecture, block size
24956    and alignment, but always has one of the following overall structures:
24957 
24958    Aligned move sequence:
24959 
24960      1) Prologue guard: Conditional that jumps up to epilogues for small
24961 	blocks that can be handled by epilogue alone.  This is faster
24962 	but also needed for correctness, since prologue assume the block
24963 	is larger than the desired alignment.
24964 
24965 	Optional dynamic check for size and libcall for large
24966 	blocks is emitted here too, with -minline-stringops-dynamically.
24967 
24968      2) Prologue: copy first few bytes in order to get destination
24969 	aligned to DESIRED_ALIGN.  It is emitted only when ALIGN is less
24970 	than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24971 	copied.  We emit either a jump tree on power of two sized
24972 	blocks, or a byte loop.
24973 
24974      3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24975 	with specified algorithm.
24976 
24977      4) Epilogue: code copying tail of the block that is too small to be
24978 	handled by main body (or up to size guarded by prologue guard).
24979 
24980   Misaligned move sequence
24981 
24982      1) missaligned move prologue/epilogue containing:
24983         a) Prologue handling small memory blocks and jumping to done_label
24984 	   (skipped if blocks are known to be large enough)
24985 	b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24986            needed by single possibly misaligned move
24987 	   (skipped if alignment is not needed)
24988         c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24989 
24990      2) Zero size guard dispatching to done_label, if needed
24991 
24992      3) dispatch to library call, if needed,
24993 
24994      3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24995 	with specified algorithm.  */
24996 bool
24997 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24998 			   rtx align_exp, rtx expected_align_exp,
24999 			   rtx expected_size_exp, rtx min_size_exp,
25000 			   rtx max_size_exp, rtx probable_max_size_exp,
25001 			   bool issetmem)
25002 {
25003   rtx destreg;
25004   rtx srcreg = NULL;
25005   rtx_code_label *label = NULL;
25006   rtx tmp;
25007   rtx_code_label *jump_around_label = NULL;
25008   HOST_WIDE_INT align = 1;
25009   unsigned HOST_WIDE_INT count = 0;
25010   HOST_WIDE_INT expected_size = -1;
25011   int size_needed = 0, epilogue_size_needed;
25012   int desired_align = 0, align_bytes = 0;
25013   enum stringop_alg alg;
25014   rtx promoted_val = NULL;
25015   rtx vec_promoted_val = NULL;
25016   bool force_loopy_epilogue = false;
25017   int dynamic_check;
25018   bool need_zero_guard = false;
25019   bool noalign;
25020   machine_mode move_mode = VOIDmode;
25021   int unroll_factor = 1;
25022   /* TODO: Once value ranges are available, fill in proper data.  */
25023   unsigned HOST_WIDE_INT min_size = 0;
25024   unsigned HOST_WIDE_INT max_size = -1;
25025   unsigned HOST_WIDE_INT probable_max_size = -1;
25026   bool misaligned_prologue_used = false;
25027 
25028   if (CONST_INT_P (align_exp))
25029     align = INTVAL (align_exp);
25030   /* i386 can do misaligned access on reasonably increased cost.  */
25031   if (CONST_INT_P (expected_align_exp)
25032       && INTVAL (expected_align_exp) > align)
25033     align = INTVAL (expected_align_exp);
25034   /* ALIGN is the minimum of destination and source alignment, but we care here
25035      just about destination alignment.  */
25036   else if (!issetmem
25037 	   && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
25038     align = MEM_ALIGN (dst) / BITS_PER_UNIT;
25039 
25040   if (CONST_INT_P (count_exp))
25041     {
25042       min_size = max_size = probable_max_size = count = expected_size
25043 	= INTVAL (count_exp);
25044       /* When COUNT is 0, there is nothing to do.  */
25045       if (!count)
25046 	return true;
25047     }
25048   else
25049     {
25050       if (min_size_exp)
25051 	min_size = INTVAL (min_size_exp);
25052       if (max_size_exp)
25053 	max_size = INTVAL (max_size_exp);
25054       if (probable_max_size_exp)
25055 	probable_max_size = INTVAL (probable_max_size_exp);
25056       if (CONST_INT_P (expected_size_exp))
25057 	expected_size = INTVAL (expected_size_exp);
25058      }
25059 
25060   /* Make sure we don't need to care about overflow later on.  */
25061   if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
25062     return false;
25063 
25064   /* Step 0: Decide on preferred algorithm, desired alignment and
25065      size of chunks to be copied by main loop.  */
25066   alg = decide_alg (count, expected_size, min_size, probable_max_size,
25067 		    issetmem,
25068 		    issetmem && val_exp == const0_rtx,
25069 		    &dynamic_check, &noalign, false);
25070   if (alg == libcall)
25071     return false;
25072   gcc_assert (alg != no_stringop);
25073 
25074   /* For now vector-version of memset is generated only for memory zeroing, as
25075      creating of promoted vector value is very cheap in this case.  */
25076   if (issetmem && alg == vector_loop && val_exp != const0_rtx)
25077     alg = unrolled_loop;
25078 
25079   if (!count)
25080     count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
25081   destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
25082   if (!issetmem)
25083     srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
25084 
25085   unroll_factor = 1;
25086   move_mode = word_mode;
25087   switch (alg)
25088     {
25089     case libcall:
25090     case no_stringop:
25091     case last_alg:
25092       gcc_unreachable ();
25093     case loop_1_byte:
25094       need_zero_guard = true;
25095       move_mode = QImode;
25096       break;
25097     case loop:
25098       need_zero_guard = true;
25099       break;
25100     case unrolled_loop:
25101       need_zero_guard = true;
25102       unroll_factor = (TARGET_64BIT ? 4 : 2);
25103       break;
25104     case vector_loop:
25105       need_zero_guard = true;
25106       unroll_factor = 4;
25107       /* Find the widest supported mode.  */
25108       move_mode = word_mode;
25109       while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
25110 	     != CODE_FOR_nothing)
25111 	  move_mode = GET_MODE_WIDER_MODE (move_mode);
25112 
25113       /* Find the corresponding vector mode with the same size as MOVE_MODE.
25114 	 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.).  */
25115       if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25116 	{
25117 	  int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25118 	  move_mode = mode_for_vector (word_mode, nunits);
25119 	  if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
25120 	    move_mode = word_mode;
25121 	}
25122       gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
25123       break;
25124     case rep_prefix_8_byte:
25125       move_mode = DImode;
25126       break;
25127     case rep_prefix_4_byte:
25128       move_mode = SImode;
25129       break;
25130     case rep_prefix_1_byte:
25131       move_mode = QImode;
25132       break;
25133     }
25134   size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
25135   epilogue_size_needed = size_needed;
25136 
25137   /* If we are going to call any library calls conditionally, make sure any
25138      pending stack adjustment happen before the first conditional branch,
25139      otherwise they will be emitted before the library call only and won't
25140      happen from the other branches.  */
25141   if (dynamic_check != -1)
25142     do_pending_stack_adjust ();
25143 
25144   desired_align = decide_alignment (align, alg, expected_size, move_mode);
25145   if (!TARGET_ALIGN_STRINGOPS || noalign)
25146     align = desired_align;
25147 
25148   /* Step 1: Prologue guard.  */
25149 
25150   /* Alignment code needs count to be in register.  */
25151   if (CONST_INT_P (count_exp) && desired_align > align)
25152     {
25153       if (INTVAL (count_exp) > desired_align
25154 	  && INTVAL (count_exp) > size_needed)
25155 	{
25156 	  align_bytes
25157 	    = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
25158 	  if (align_bytes <= 0)
25159 	    align_bytes = 0;
25160 	  else
25161 	    align_bytes = desired_align - align_bytes;
25162 	}
25163       if (align_bytes == 0)
25164 	count_exp = force_reg (counter_mode (count_exp), count_exp);
25165     }
25166   gcc_assert (desired_align >= 1 && align >= 1);
25167 
25168   /* Misaligned move sequences handle both prologue and epilogue at once.
25169      Default code generation results in a smaller code for large alignments
25170      and also avoids redundant job when sizes are known precisely.  */
25171   misaligned_prologue_used
25172     = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
25173        && MAX (desired_align, epilogue_size_needed) <= 32
25174        && desired_align <= epilogue_size_needed
25175        && ((desired_align > align && !align_bytes)
25176 	   || (!count && epilogue_size_needed > 1)));
25177 
25178   /* Do the cheap promotion to allow better CSE across the
25179      main loop and epilogue (ie one load of the big constant in the
25180      front of all code.
25181      For now the misaligned move sequences do not have fast path
25182      without broadcasting.  */
25183   if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
25184     {
25185       if (alg == vector_loop)
25186 	{
25187 	  gcc_assert (val_exp == const0_rtx);
25188 	  vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
25189 	  promoted_val = promote_duplicated_reg_to_size (val_exp,
25190 							 GET_MODE_SIZE (word_mode),
25191 							 desired_align, align);
25192 	}
25193       else
25194 	{
25195 	  promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25196 							 desired_align, align);
25197 	}
25198     }
25199   /* Misaligned move sequences handles both prologues and epilogues at once.
25200      Default code generation results in smaller code for large alignments and
25201      also avoids redundant job when sizes are known precisely.  */
25202   if (misaligned_prologue_used)
25203     {
25204       /* Misaligned move prologue handled small blocks by itself.  */
25205       expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25206 	   (dst, src, &destreg, &srcreg,
25207 	    move_mode, promoted_val, vec_promoted_val,
25208 	    &count_exp,
25209 	    &jump_around_label,
25210             desired_align < align
25211 	    ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25212 	    desired_align, align, &min_size, dynamic_check, issetmem);
25213       if (!issetmem)
25214         src = change_address (src, BLKmode, srcreg);
25215       dst = change_address (dst, BLKmode, destreg);
25216       set_mem_align (dst, desired_align * BITS_PER_UNIT);
25217       epilogue_size_needed = 0;
25218       if (need_zero_guard
25219 	  && min_size < (unsigned HOST_WIDE_INT) size_needed)
25220 	{
25221 	  /* It is possible that we copied enough so the main loop will not
25222 	     execute.  */
25223 	  gcc_assert (size_needed > 1);
25224 	  if (jump_around_label == NULL_RTX)
25225 	    jump_around_label = gen_label_rtx ();
25226 	  emit_cmp_and_jump_insns (count_exp,
25227 				   GEN_INT (size_needed),
25228 				   LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25229 	  if (expected_size == -1
25230 	      || expected_size < (desired_align - align) / 2 + size_needed)
25231 	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
25232 	  else
25233 	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
25234 	}
25235     }
25236   /* Ensure that alignment prologue won't copy past end of block.  */
25237   else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25238     {
25239       epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25240       /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25241 	 Make sure it is power of 2.  */
25242       epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25243 
25244       /* To improve performance of small blocks, we jump around the VAL
25245 	 promoting mode.  This mean that if the promoted VAL is not constant,
25246 	 we might not use it in the epilogue and have to use byte
25247 	 loop variant.  */
25248       if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25249 	force_loopy_epilogue = true;
25250       if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25251 	  || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25252 	{
25253 	  /* If main algorithm works on QImode, no epilogue is needed.
25254 	     For small sizes just don't align anything.  */
25255 	  if (size_needed == 1)
25256 	    desired_align = align;
25257 	  else
25258 	    goto epilogue;
25259 	}
25260       else if (!count
25261 	       && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25262 	{
25263 	  label = gen_label_rtx ();
25264 	  emit_cmp_and_jump_insns (count_exp,
25265 				   GEN_INT (epilogue_size_needed),
25266 				   LTU, 0, counter_mode (count_exp), 1, label);
25267 	  if (expected_size == -1 || expected_size < epilogue_size_needed)
25268 	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
25269 	  else
25270 	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
25271 	}
25272     }
25273 
25274   /* Emit code to decide on runtime whether library call or inline should be
25275      used.  */
25276   if (dynamic_check != -1)
25277     {
25278       if (!issetmem && CONST_INT_P (count_exp))
25279 	{
25280 	  if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25281 	    {
25282 	      emit_block_move_via_libcall (dst, src, count_exp, false);
25283 	      count_exp = const0_rtx;
25284 	      goto epilogue;
25285 	    }
25286 	}
25287       else
25288 	{
25289 	  rtx_code_label *hot_label = gen_label_rtx ();
25290 	  if (jump_around_label == NULL_RTX)
25291 	    jump_around_label = gen_label_rtx ();
25292 	  emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25293 				   LEU, 0, counter_mode (count_exp),
25294 				   1, hot_label);
25295 	  predict_jump (REG_BR_PROB_BASE * 90 / 100);
25296 	  if (issetmem)
25297 	    set_storage_via_libcall (dst, count_exp, val_exp, false);
25298 	  else
25299 	    emit_block_move_via_libcall (dst, src, count_exp, false);
25300 	  emit_jump (jump_around_label);
25301 	  emit_label (hot_label);
25302 	}
25303     }
25304 
25305   /* Step 2: Alignment prologue.  */
25306   /* Do the expensive promotion once we branched off the small blocks.  */
25307   if (issetmem && !promoted_val)
25308     promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25309 						   desired_align, align);
25310 
25311   if (desired_align > align && !misaligned_prologue_used)
25312     {
25313       if (align_bytes == 0)
25314 	{
25315 	  /* Except for the first move in prologue, we no longer know
25316 	     constant offset in aliasing info.  It don't seems to worth
25317 	     the pain to maintain it for the first move, so throw away
25318 	     the info early.  */
25319 	  dst = change_address (dst, BLKmode, destreg);
25320 	  if (!issetmem)
25321 	    src = change_address (src, BLKmode, srcreg);
25322 	  dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25323 					    promoted_val, vec_promoted_val,
25324 					    count_exp, align, desired_align,
25325 					    issetmem);
25326 	  /* At most desired_align - align bytes are copied.  */
25327 	  if (min_size < (unsigned)(desired_align - align))
25328 	    min_size = 0;
25329 	  else
25330 	    min_size -= desired_align - align;
25331 	}
25332       else
25333 	{
25334 	  /* If we know how many bytes need to be stored before dst is
25335 	     sufficiently aligned, maintain aliasing info accurately.  */
25336 	  dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25337 							   srcreg,
25338 							   promoted_val,
25339 							   vec_promoted_val,
25340 							   desired_align,
25341 							   align_bytes,
25342 							   issetmem);
25343 
25344 	  count_exp = plus_constant (counter_mode (count_exp),
25345 				     count_exp, -align_bytes);
25346 	  count -= align_bytes;
25347 	  min_size -= align_bytes;
25348 	  max_size -= align_bytes;
25349 	}
25350       if (need_zero_guard
25351 	  && min_size < (unsigned HOST_WIDE_INT) size_needed
25352 	  && (count < (unsigned HOST_WIDE_INT) size_needed
25353 	      || (align_bytes == 0
25354 		  && count < ((unsigned HOST_WIDE_INT) size_needed
25355 			      + desired_align - align))))
25356 	{
25357 	  /* It is possible that we copied enough so the main loop will not
25358 	     execute.  */
25359 	  gcc_assert (size_needed > 1);
25360 	  if (label == NULL_RTX)
25361 	    label = gen_label_rtx ();
25362 	  emit_cmp_and_jump_insns (count_exp,
25363 				   GEN_INT (size_needed),
25364 				   LTU, 0, counter_mode (count_exp), 1, label);
25365 	  if (expected_size == -1
25366 	      || expected_size < (desired_align - align) / 2 + size_needed)
25367 	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
25368 	  else
25369 	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
25370 	}
25371     }
25372   if (label && size_needed == 1)
25373     {
25374       emit_label (label);
25375       LABEL_NUSES (label) = 1;
25376       label = NULL;
25377       epilogue_size_needed = 1;
25378       if (issetmem)
25379 	promoted_val = val_exp;
25380     }
25381   else if (label == NULL_RTX && !misaligned_prologue_used)
25382     epilogue_size_needed = size_needed;
25383 
25384   /* Step 3: Main loop.  */
25385 
25386   switch (alg)
25387     {
25388     case libcall:
25389     case no_stringop:
25390     case last_alg:
25391       gcc_unreachable ();
25392     case loop_1_byte:
25393     case loop:
25394     case unrolled_loop:
25395       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25396 				     count_exp, move_mode, unroll_factor,
25397 				     expected_size, issetmem);
25398       break;
25399     case vector_loop:
25400       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25401 				     vec_promoted_val, count_exp, move_mode,
25402 				     unroll_factor, expected_size, issetmem);
25403       break;
25404     case rep_prefix_8_byte:
25405     case rep_prefix_4_byte:
25406     case rep_prefix_1_byte:
25407       expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25408 				       val_exp, count_exp, move_mode, issetmem);
25409       break;
25410     }
25411   /* Adjust properly the offset of src and dest memory for aliasing.  */
25412   if (CONST_INT_P (count_exp))
25413     {
25414       if (!issetmem)
25415 	src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25416 					    (count / size_needed) * size_needed);
25417       dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25418 					  (count / size_needed) * size_needed);
25419     }
25420   else
25421     {
25422       if (!issetmem)
25423 	src = change_address (src, BLKmode, srcreg);
25424       dst = change_address (dst, BLKmode, destreg);
25425     }
25426 
25427   /* Step 4: Epilogue to copy the remaining bytes.  */
25428  epilogue:
25429   if (label)
25430     {
25431       /* When the main loop is done, COUNT_EXP might hold original count,
25432 	 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25433 	 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25434 	 bytes. Compensate if needed.  */
25435 
25436       if (size_needed < epilogue_size_needed)
25437 	{
25438 	  tmp =
25439 	    expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25440 				 GEN_INT (size_needed - 1), count_exp, 1,
25441 				 OPTAB_DIRECT);
25442 	  if (tmp != count_exp)
25443 	    emit_move_insn (count_exp, tmp);
25444 	}
25445       emit_label (label);
25446       LABEL_NUSES (label) = 1;
25447     }
25448 
25449   if (count_exp != const0_rtx && epilogue_size_needed > 1)
25450     {
25451       if (force_loopy_epilogue)
25452 	expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25453 					 epilogue_size_needed);
25454       else
25455 	{
25456 	  if (issetmem)
25457 	    expand_setmem_epilogue (dst, destreg, promoted_val,
25458 				    vec_promoted_val, count_exp,
25459 				    epilogue_size_needed);
25460 	  else
25461 	    expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25462 				    epilogue_size_needed);
25463 	}
25464     }
25465   if (jump_around_label)
25466     emit_label (jump_around_label);
25467   return true;
25468 }
25469 
25470 
25471 /* Expand the appropriate insns for doing strlen if not just doing
25472    repnz; scasb
25473 
25474    out = result, initialized with the start address
25475    align_rtx = alignment of the address.
25476    scratch = scratch register, initialized with the startaddress when
25477 	not aligned, otherwise undefined
25478 
25479    This is just the body. It needs the initializations mentioned above and
25480    some address computing at the end.  These things are done in i386.md.  */
25481 
25482 static void
25483 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25484 {
25485   int align;
25486   rtx tmp;
25487   rtx_code_label *align_2_label = NULL;
25488   rtx_code_label *align_3_label = NULL;
25489   rtx_code_label *align_4_label = gen_label_rtx ();
25490   rtx_code_label *end_0_label = gen_label_rtx ();
25491   rtx mem;
25492   rtx tmpreg = gen_reg_rtx (SImode);
25493   rtx scratch = gen_reg_rtx (SImode);
25494   rtx cmp;
25495 
25496   align = 0;
25497   if (CONST_INT_P (align_rtx))
25498     align = INTVAL (align_rtx);
25499 
25500   /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
25501 
25502   /* Is there a known alignment and is it less than 4?  */
25503   if (align < 4)
25504     {
25505       rtx scratch1 = gen_reg_rtx (Pmode);
25506       emit_move_insn (scratch1, out);
25507       /* Is there a known alignment and is it not 2? */
25508       if (align != 2)
25509 	{
25510 	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25511 	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25512 
25513 	  /* Leave just the 3 lower bits.  */
25514 	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25515 				    NULL_RTX, 0, OPTAB_WIDEN);
25516 
25517 	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25518 				   Pmode, 1, align_4_label);
25519 	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25520 				   Pmode, 1, align_2_label);
25521 	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25522 				   Pmode, 1, align_3_label);
25523 	}
25524       else
25525         {
25526 	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
25527 	     check if is aligned to 4 - byte.  */
25528 
25529 	  align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25530 				    NULL_RTX, 0, OPTAB_WIDEN);
25531 
25532 	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25533 				   Pmode, 1, align_4_label);
25534         }
25535 
25536       mem = change_address (src, QImode, out);
25537 
25538       /* Now compare the bytes.  */
25539 
25540       /* Compare the first n unaligned byte on a byte per byte basis.  */
25541       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25542 			       QImode, 1, end_0_label);
25543 
25544       /* Increment the address.  */
25545       emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25546 
25547       /* Not needed with an alignment of 2 */
25548       if (align != 2)
25549 	{
25550 	  emit_label (align_2_label);
25551 
25552 	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25553 				   end_0_label);
25554 
25555 	  emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25556 
25557 	  emit_label (align_3_label);
25558 	}
25559 
25560       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25561 			       end_0_label);
25562 
25563       emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25564     }
25565 
25566   /* Generate loop to check 4 bytes at a time.  It is not a good idea to
25567      align this loop.  It gives only huge programs, but does not help to
25568      speed up.  */
25569   emit_label (align_4_label);
25570 
25571   mem = change_address (src, SImode, out);
25572   emit_move_insn (scratch, mem);
25573   emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25574 
25575   /* This formula yields a nonzero result iff one of the bytes is zero.
25576      This saves three branches inside loop and many cycles.  */
25577 
25578   emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25579   emit_insn (gen_one_cmplsi2 (scratch, scratch));
25580   emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25581   emit_insn (gen_andsi3 (tmpreg, tmpreg,
25582 			 gen_int_mode (0x80808080, SImode)));
25583   emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25584 			   align_4_label);
25585 
25586   if (TARGET_CMOVE)
25587     {
25588        rtx reg = gen_reg_rtx (SImode);
25589        rtx reg2 = gen_reg_rtx (Pmode);
25590        emit_move_insn (reg, tmpreg);
25591        emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25592 
25593        /* If zero is not in the first two bytes, move two bytes forward.  */
25594        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25595        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25596        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25597        emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25598 			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
25599 						     reg,
25600 						     tmpreg)));
25601        /* Emit lea manually to avoid clobbering of flags.  */
25602        emit_insn (gen_rtx_SET (SImode, reg2,
25603 			       gen_rtx_PLUS (Pmode, out, const2_rtx)));
25604 
25605        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25606        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25607        emit_insn (gen_rtx_SET (VOIDmode, out,
25608 			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25609 						     reg2,
25610 						     out)));
25611     }
25612   else
25613     {
25614        rtx_code_label *end_2_label = gen_label_rtx ();
25615        /* Is zero in the first two bytes? */
25616 
25617        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25618        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25619        tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25620        tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25621                             gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25622                             pc_rtx);
25623        tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25624        JUMP_LABEL (tmp) = end_2_label;
25625 
25626        /* Not in the first two.  Move two bytes forward.  */
25627        emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25628        emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25629 
25630        emit_label (end_2_label);
25631 
25632     }
25633 
25634   /* Avoid branch in fixing the byte.  */
25635   tmpreg = gen_lowpart (QImode, tmpreg);
25636   emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
25637   tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25638   cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25639   emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25640 
25641   emit_label (end_0_label);
25642 }
25643 
25644 /* Expand strlen.  */
25645 
25646 bool
25647 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25648 {
25649   rtx addr, scratch1, scratch2, scratch3, scratch4;
25650 
25651   /* The generic case of strlen expander is long.  Avoid it's
25652      expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
25653 
25654   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25655       && !TARGET_INLINE_ALL_STRINGOPS
25656       && !optimize_insn_for_size_p ()
25657       && (!CONST_INT_P (align) || INTVAL (align) < 4))
25658     return false;
25659 
25660   addr = force_reg (Pmode, XEXP (src, 0));
25661   scratch1 = gen_reg_rtx (Pmode);
25662 
25663   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25664       && !optimize_insn_for_size_p ())
25665     {
25666       /* Well it seems that some optimizer does not combine a call like
25667          foo(strlen(bar), strlen(bar));
25668          when the move and the subtraction is done here.  It does calculate
25669          the length just once when these instructions are done inside of
25670          output_strlen_unroll().  But I think since &bar[strlen(bar)] is
25671          often used and I use one fewer register for the lifetime of
25672          output_strlen_unroll() this is better.  */
25673 
25674       emit_move_insn (out, addr);
25675 
25676       ix86_expand_strlensi_unroll_1 (out, src, align);
25677 
25678       /* strlensi_unroll_1 returns the address of the zero at the end of
25679          the string, like memchr(), so compute the length by subtracting
25680          the start address.  */
25681       emit_insn (ix86_gen_sub3 (out, out, addr));
25682     }
25683   else
25684     {
25685       rtx unspec;
25686 
25687       /* Can't use this if the user has appropriated eax, ecx, or edi.  */
25688       if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25689         return false;
25690 
25691       scratch2 = gen_reg_rtx (Pmode);
25692       scratch3 = gen_reg_rtx (Pmode);
25693       scratch4 = force_reg (Pmode, constm1_rtx);
25694 
25695       emit_move_insn (scratch3, addr);
25696       eoschar = force_reg (QImode, eoschar);
25697 
25698       src = replace_equiv_address_nv (src, scratch3);
25699 
25700       /* If .md starts supporting :P, this can be done in .md.  */
25701       unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25702 						 scratch4), UNSPEC_SCAS);
25703       emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25704       emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25705       emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25706     }
25707   return true;
25708 }
25709 
25710 /* For given symbol (function) construct code to compute address of it's PLT
25711    entry in large x86-64 PIC model.  */
25712 static rtx
25713 construct_plt_address (rtx symbol)
25714 {
25715   rtx tmp, unspec;
25716 
25717   gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25718   gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25719   gcc_assert (Pmode == DImode);
25720 
25721   tmp = gen_reg_rtx (Pmode);
25722   unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25723 
25724   emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25725   emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25726   return tmp;
25727 }
25728 
25729 rtx
25730 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25731 		  rtx callarg2,
25732 		  rtx pop, bool sibcall)
25733 {
25734   rtx vec[3];
25735   rtx use = NULL, call;
25736   unsigned int vec_len = 0;
25737 
25738   if (pop == const0_rtx)
25739     pop = NULL;
25740   gcc_assert (!TARGET_64BIT || !pop);
25741 
25742   if (TARGET_MACHO && !TARGET_64BIT)
25743     {
25744 #if TARGET_MACHO
25745       if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25746 	fnaddr = machopic_indirect_call_target (fnaddr);
25747 #endif
25748     }
25749   else
25750     {
25751       /* Static functions and indirect calls don't need the pic register.  */
25752       if (flag_pic
25753 	  && (!TARGET_64BIT
25754 	      || (ix86_cmodel == CM_LARGE_PIC
25755 		  && DEFAULT_ABI != MS_ABI))
25756 	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25757 	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25758 	{
25759 	  use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25760 	  if (ix86_use_pseudo_pic_reg ())
25761 	    emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25762 			    pic_offset_table_rtx);
25763 	}
25764     }
25765 
25766   /* Skip setting up RAX register for -mskip-rax-setup when there are no
25767      parameters passed in vector registers.  */
25768   if (TARGET_64BIT
25769       && (INTVAL (callarg2) > 0
25770 	  || (INTVAL (callarg2) == 0
25771 	      && (TARGET_SSE || !flag_skip_rax_setup))))
25772     {
25773       rtx al = gen_rtx_REG (QImode, AX_REG);
25774       emit_move_insn (al, callarg2);
25775       use_reg (&use, al);
25776     }
25777 
25778   if (ix86_cmodel == CM_LARGE_PIC
25779       && !TARGET_PECOFF
25780       && MEM_P (fnaddr)
25781       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25782       && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25783     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25784   else if (sibcall
25785 	   ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25786 	   : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25787     {
25788       fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25789       fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25790     }
25791 
25792   call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25793 
25794   if (retval)
25795     {
25796       /* We should add bounds as destination register in case
25797 	 pointer with bounds may be returned.  */
25798       if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25799 	{
25800 	  rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25801 	  rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25802 	  if (GET_CODE (retval) == PARALLEL)
25803 	    {
25804 	      b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25805 	      b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25806 	      rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25807 	      retval = chkp_join_splitted_slot (retval, par);
25808 	    }
25809 	  else
25810 	    {
25811 	      retval = gen_rtx_PARALLEL (VOIDmode,
25812 					 gen_rtvec (3, retval, b0, b1));
25813 	      chkp_put_regs_to_expr_list (retval);
25814 	    }
25815 	}
25816 
25817       call = gen_rtx_SET (VOIDmode, retval, call);
25818     }
25819   vec[vec_len++] = call;
25820 
25821   if (pop)
25822     {
25823       pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25824       pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25825       vec[vec_len++] = pop;
25826     }
25827 
25828   if (TARGET_64BIT_MS_ABI
25829       && (!callarg2 || INTVAL (callarg2) != -2))
25830     {
25831       int const cregs_size
25832 	= ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25833       int i;
25834 
25835       for (i = 0; i < cregs_size; i++)
25836 	{
25837 	  int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25838 	  machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25839 
25840 	  clobber_reg (&use, gen_rtx_REG (mode, regno));
25841 	}
25842     }
25843 
25844   if (vec_len > 1)
25845     call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25846   call = emit_call_insn (call);
25847   if (use)
25848     CALL_INSN_FUNCTION_USAGE (call) = use;
25849 
25850   return call;
25851 }
25852 
25853 /* Output the assembly for a call instruction.  */
25854 
25855 const char *
25856 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25857 {
25858   bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25859   bool seh_nop_p = false;
25860   const char *xasm;
25861 
25862   if (SIBLING_CALL_P (insn))
25863     {
25864       if (direct_p)
25865 	xasm = "%!jmp\t%P0";
25866       /* SEH epilogue detection requires the indirect branch case
25867 	 to include REX.W.  */
25868       else if (TARGET_SEH)
25869 	xasm = "%!rex.W jmp %A0";
25870       else
25871 	xasm = "%!jmp\t%A0";
25872 
25873       output_asm_insn (xasm, &call_op);
25874       return "";
25875     }
25876 
25877   /* SEH unwinding can require an extra nop to be emitted in several
25878      circumstances.  Determine if we have one of those.  */
25879   if (TARGET_SEH)
25880     {
25881       rtx_insn *i;
25882 
25883       for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25884 	{
25885 	  /* If we get to another real insn, we don't need the nop.  */
25886 	  if (INSN_P (i))
25887 	    break;
25888 
25889 	  /* If we get to the epilogue note, prevent a catch region from
25890 	     being adjacent to the standard epilogue sequence.  If non-
25891 	     call-exceptions, we'll have done this during epilogue emission. */
25892 	  if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25893 	      && !flag_non_call_exceptions
25894 	      && !can_throw_internal (insn))
25895 	    {
25896 	      seh_nop_p = true;
25897 	      break;
25898 	    }
25899 	}
25900 
25901       /* If we didn't find a real insn following the call, prevent the
25902 	 unwinder from looking into the next function.  */
25903       if (i == NULL)
25904 	seh_nop_p = true;
25905     }
25906 
25907   if (direct_p)
25908     xasm = "%!call\t%P0";
25909   else
25910     xasm = "%!call\t%A0";
25911 
25912   output_asm_insn (xasm, &call_op);
25913 
25914   if (seh_nop_p)
25915     return "nop";
25916 
25917   return "";
25918 }
25919 
25920 /* Clear stack slot assignments remembered from previous functions.
25921    This is called from INIT_EXPANDERS once before RTL is emitted for each
25922    function.  */
25923 
25924 static struct machine_function *
25925 ix86_init_machine_status (void)
25926 {
25927   struct machine_function *f;
25928 
25929   f = ggc_cleared_alloc<machine_function> ();
25930   f->use_fast_prologue_epilogue_nregs = -1;
25931   f->call_abi = ix86_abi;
25932 
25933   return f;
25934 }
25935 
25936 /* Return a MEM corresponding to a stack slot with mode MODE.
25937    Allocate a new slot if necessary.
25938 
25939    The RTL for a function can have several slots available: N is
25940    which slot to use.  */
25941 
25942 rtx
25943 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25944 {
25945   struct stack_local_entry *s;
25946 
25947   gcc_assert (n < MAX_386_STACK_LOCALS);
25948 
25949   for (s = ix86_stack_locals; s; s = s->next)
25950     if (s->mode == mode && s->n == n)
25951       return validize_mem (copy_rtx (s->rtl));
25952 
25953   s = ggc_alloc<stack_local_entry> ();
25954   s->n = n;
25955   s->mode = mode;
25956   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25957 
25958   s->next = ix86_stack_locals;
25959   ix86_stack_locals = s;
25960   return validize_mem (copy_rtx (s->rtl));
25961 }
25962 
25963 static void
25964 ix86_instantiate_decls (void)
25965 {
25966   struct stack_local_entry *s;
25967 
25968   for (s = ix86_stack_locals; s; s = s->next)
25969     if (s->rtl != NULL_RTX)
25970       instantiate_decl_rtl (s->rtl);
25971 }
25972 
25973 /* Check whether x86 address PARTS is a pc-relative address.  */
25974 
25975 static bool
25976 rip_relative_addr_p (struct ix86_address *parts)
25977 {
25978   rtx base, index, disp;
25979 
25980   base = parts->base;
25981   index = parts->index;
25982   disp = parts->disp;
25983 
25984   if (disp && !base && !index)
25985     {
25986       if (TARGET_64BIT)
25987 	{
25988 	  rtx symbol = disp;
25989 
25990 	  if (GET_CODE (disp) == CONST)
25991 	    symbol = XEXP (disp, 0);
25992 	  if (GET_CODE (symbol) == PLUS
25993 	      && CONST_INT_P (XEXP (symbol, 1)))
25994 	    symbol = XEXP (symbol, 0);
25995 
25996 	  if (GET_CODE (symbol) == LABEL_REF
25997 	      || (GET_CODE (symbol) == SYMBOL_REF
25998 		  && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25999 	      || (GET_CODE (symbol) == UNSPEC
26000 		  && (XINT (symbol, 1) == UNSPEC_GOTPCREL
26001 		      || XINT (symbol, 1) == UNSPEC_PCREL
26002 		      || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
26003 	    return true;
26004 	}
26005     }
26006   return false;
26007 }
26008 
26009 /* Calculate the length of the memory address in the instruction encoding.
26010    Includes addr32 prefix, does not include the one-byte modrm, opcode,
26011    or other prefixes.  We never generate addr32 prefix for LEA insn.  */
26012 
26013 int
26014 memory_address_length (rtx addr, bool lea)
26015 {
26016   struct ix86_address parts;
26017   rtx base, index, disp;
26018   int len;
26019   int ok;
26020 
26021   if (GET_CODE (addr) == PRE_DEC
26022       || GET_CODE (addr) == POST_INC
26023       || GET_CODE (addr) == PRE_MODIFY
26024       || GET_CODE (addr) == POST_MODIFY)
26025     return 0;
26026 
26027   ok = ix86_decompose_address (addr, &parts);
26028   gcc_assert (ok);
26029 
26030   len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
26031 
26032   /*  If this is not LEA instruction, add the length of addr32 prefix.  */
26033   if (TARGET_64BIT && !lea
26034       && (SImode_address_operand (addr, VOIDmode)
26035 	  || (parts.base && GET_MODE (parts.base) == SImode)
26036 	  || (parts.index && GET_MODE (parts.index) == SImode)))
26037     len++;
26038 
26039   base = parts.base;
26040   index = parts.index;
26041   disp = parts.disp;
26042 
26043   if (base && GET_CODE (base) == SUBREG)
26044     base = SUBREG_REG (base);
26045   if (index && GET_CODE (index) == SUBREG)
26046     index = SUBREG_REG (index);
26047 
26048   gcc_assert (base == NULL_RTX || REG_P (base));
26049   gcc_assert (index == NULL_RTX || REG_P (index));
26050 
26051   /* Rule of thumb:
26052        - esp as the base always wants an index,
26053        - ebp as the base always wants a displacement,
26054        - r12 as the base always wants an index,
26055        - r13 as the base always wants a displacement.  */
26056 
26057   /* Register Indirect.  */
26058   if (base && !index && !disp)
26059     {
26060       /* esp (for its index) and ebp (for its displacement) need
26061 	 the two-byte modrm form.  Similarly for r12 and r13 in 64-bit
26062 	 code.  */
26063       if (base == arg_pointer_rtx
26064 	  || base == frame_pointer_rtx
26065 	  || REGNO (base) == SP_REG
26066 	  || REGNO (base) == BP_REG
26067 	  || REGNO (base) == R12_REG
26068 	  || REGNO (base) == R13_REG)
26069 	len++;
26070     }
26071 
26072   /* Direct Addressing.  In 64-bit mode mod 00 r/m 5
26073      is not disp32, but disp32(%rip), so for disp32
26074      SIB byte is needed, unless print_operand_address
26075      optimizes it into disp32(%rip) or (%rip) is implied
26076      by UNSPEC.  */
26077   else if (disp && !base && !index)
26078     {
26079       len += 4;
26080       if (rip_relative_addr_p (&parts))
26081 	len++;
26082     }
26083   else
26084     {
26085       /* Find the length of the displacement constant.  */
26086       if (disp)
26087 	{
26088 	  if (base && satisfies_constraint_K (disp))
26089 	    len += 1;
26090 	  else
26091 	    len += 4;
26092 	}
26093       /* ebp always wants a displacement.  Similarly r13.  */
26094       else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
26095 	len++;
26096 
26097       /* An index requires the two-byte modrm form....  */
26098       if (index
26099 	  /* ...like esp (or r12), which always wants an index.  */
26100 	  || base == arg_pointer_rtx
26101 	  || base == frame_pointer_rtx
26102 	  || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
26103 	len++;
26104     }
26105 
26106   return len;
26107 }
26108 
26109 /* Compute default value for "length_immediate" attribute.  When SHORTFORM
26110    is set, expect that insn have 8bit immediate alternative.  */
26111 int
26112 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
26113 {
26114   int len = 0;
26115   int i;
26116   extract_insn_cached (insn);
26117   for (i = recog_data.n_operands - 1; i >= 0; --i)
26118     if (CONSTANT_P (recog_data.operand[i]))
26119       {
26120         enum attr_mode mode = get_attr_mode (insn);
26121 
26122 	gcc_assert (!len);
26123 	if (shortform && CONST_INT_P (recog_data.operand[i]))
26124 	  {
26125 	    HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
26126 	    switch (mode)
26127 	      {
26128 	      case MODE_QI:
26129 		len = 1;
26130 		continue;
26131 	      case MODE_HI:
26132 		ival = trunc_int_for_mode (ival, HImode);
26133 		break;
26134 	      case MODE_SI:
26135 		ival = trunc_int_for_mode (ival, SImode);
26136 		break;
26137 	      default:
26138 		break;
26139 	      }
26140 	    if (IN_RANGE (ival, -128, 127))
26141 	      {
26142 		len = 1;
26143 		continue;
26144 	      }
26145 	  }
26146 	switch (mode)
26147 	  {
26148 	  case MODE_QI:
26149 	    len = 1;
26150 	    break;
26151 	  case MODE_HI:
26152 	    len = 2;
26153 	    break;
26154 	  case MODE_SI:
26155 	    len = 4;
26156 	    break;
26157 	  /* Immediates for DImode instructions are encoded
26158 	     as 32bit sign extended values.  */
26159 	  case MODE_DI:
26160 	    len = 4;
26161 	    break;
26162 	  default:
26163 	    fatal_insn ("unknown insn mode", insn);
26164 	}
26165       }
26166   return len;
26167 }
26168 
26169 /* Compute default value for "length_address" attribute.  */
26170 int
26171 ix86_attr_length_address_default (rtx_insn *insn)
26172 {
26173   int i;
26174 
26175   if (get_attr_type (insn) == TYPE_LEA)
26176     {
26177       rtx set = PATTERN (insn), addr;
26178 
26179       if (GET_CODE (set) == PARALLEL)
26180 	set = XVECEXP (set, 0, 0);
26181 
26182       gcc_assert (GET_CODE (set) == SET);
26183 
26184       addr = SET_SRC (set);
26185 
26186       return memory_address_length (addr, true);
26187     }
26188 
26189   extract_insn_cached (insn);
26190   for (i = recog_data.n_operands - 1; i >= 0; --i)
26191     if (MEM_P (recog_data.operand[i]))
26192       {
26193         constrain_operands_cached (insn, reload_completed);
26194         if (which_alternative != -1)
26195 	  {
26196 	    const char *constraints = recog_data.constraints[i];
26197 	    int alt = which_alternative;
26198 
26199 	    while (*constraints == '=' || *constraints == '+')
26200 	      constraints++;
26201 	    while (alt-- > 0)
26202 	      while (*constraints++ != ',')
26203 		;
26204 	    /* Skip ignored operands.  */
26205 	    if (*constraints == 'X')
26206 	      continue;
26207 	  }
26208 	return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26209       }
26210   return 0;
26211 }
26212 
26213 /* Compute default value for "length_vex" attribute. It includes
26214    2 or 3 byte VEX prefix and 1 opcode byte.  */
26215 
26216 int
26217 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26218 			      bool has_vex_w)
26219 {
26220   int i;
26221 
26222   /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
26223      byte VEX prefix.  */
26224   if (!has_0f_opcode || has_vex_w)
26225     return 3 + 1;
26226 
26227  /* We can always use 2 byte VEX prefix in 32bit.  */
26228   if (!TARGET_64BIT)
26229     return 2 + 1;
26230 
26231   extract_insn_cached (insn);
26232 
26233   for (i = recog_data.n_operands - 1; i >= 0; --i)
26234     if (REG_P (recog_data.operand[i]))
26235       {
26236 	/* REX.W bit uses 3 byte VEX prefix.  */
26237 	if (GET_MODE (recog_data.operand[i]) == DImode
26238 	    && GENERAL_REG_P (recog_data.operand[i]))
26239 	  return 3 + 1;
26240       }
26241     else
26242       {
26243 	/* REX.X or REX.B bits use 3 byte VEX prefix.  */
26244 	if (MEM_P (recog_data.operand[i])
26245 	    && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26246 	  return 3 + 1;
26247       }
26248 
26249   return 2 + 1;
26250 }
26251 
26252 /* Return the maximum number of instructions a cpu can issue.  */
26253 
26254 static int
26255 ix86_issue_rate (void)
26256 {
26257   switch (ix86_tune)
26258     {
26259     case PROCESSOR_PENTIUM:
26260     case PROCESSOR_BONNELL:
26261     case PROCESSOR_SILVERMONT:
26262     case PROCESSOR_KNL:
26263     case PROCESSOR_INTEL:
26264     case PROCESSOR_K6:
26265     case PROCESSOR_BTVER2:
26266     case PROCESSOR_PENTIUM4:
26267     case PROCESSOR_NOCONA:
26268       return 2;
26269 
26270     case PROCESSOR_PENTIUMPRO:
26271     case PROCESSOR_ATHLON:
26272     case PROCESSOR_K8:
26273     case PROCESSOR_AMDFAM10:
26274     case PROCESSOR_GENERIC:
26275     case PROCESSOR_BTVER1:
26276       return 3;
26277 
26278     case PROCESSOR_BDVER1:
26279     case PROCESSOR_BDVER2:
26280     case PROCESSOR_BDVER3:
26281     case PROCESSOR_BDVER4:
26282     case PROCESSOR_CORE2:
26283     case PROCESSOR_NEHALEM:
26284     case PROCESSOR_SANDYBRIDGE:
26285     case PROCESSOR_HASWELL:
26286       return 4;
26287 
26288     default:
26289       return 1;
26290     }
26291 }
26292 
26293 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26294    by DEP_INSN and nothing set by DEP_INSN.  */
26295 
26296 static bool
26297 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26298 {
26299   rtx set, set2;
26300 
26301   /* Simplify the test for uninteresting insns.  */
26302   if (insn_type != TYPE_SETCC
26303       && insn_type != TYPE_ICMOV
26304       && insn_type != TYPE_FCMOV
26305       && insn_type != TYPE_IBR)
26306     return false;
26307 
26308   if ((set = single_set (dep_insn)) != 0)
26309     {
26310       set = SET_DEST (set);
26311       set2 = NULL_RTX;
26312     }
26313   else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26314 	   && XVECLEN (PATTERN (dep_insn), 0) == 2
26315 	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26316 	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26317     {
26318       set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26319       set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26320     }
26321   else
26322     return false;
26323 
26324   if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26325     return false;
26326 
26327   /* This test is true if the dependent insn reads the flags but
26328      not any other potentially set register.  */
26329   if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26330     return false;
26331 
26332   if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26333     return false;
26334 
26335   return true;
26336 }
26337 
26338 /* Return true iff USE_INSN has a memory address with operands set by
26339    SET_INSN.  */
26340 
26341 bool
26342 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26343 {
26344   int i;
26345   extract_insn_cached (use_insn);
26346   for (i = recog_data.n_operands - 1; i >= 0; --i)
26347     if (MEM_P (recog_data.operand[i]))
26348       {
26349 	rtx addr = XEXP (recog_data.operand[i], 0);
26350 	return modified_in_p (addr, set_insn) != 0;
26351       }
26352   return false;
26353 }
26354 
26355 /* Helper function for exact_store_load_dependency.
26356    Return true if addr is found in insn.  */
26357 static bool
26358 exact_dependency_1 (rtx addr, rtx insn)
26359 {
26360   enum rtx_code code;
26361   const char *format_ptr;
26362   int i, j;
26363 
26364   code = GET_CODE (insn);
26365   switch (code)
26366     {
26367     case MEM:
26368       if (rtx_equal_p (addr, insn))
26369 	return true;
26370       break;
26371     case REG:
26372     CASE_CONST_ANY:
26373     case SYMBOL_REF:
26374     case CODE_LABEL:
26375     case PC:
26376     case CC0:
26377     case EXPR_LIST:
26378       return false;
26379     default:
26380       break;
26381     }
26382 
26383   format_ptr = GET_RTX_FORMAT (code);
26384   for (i = 0; i < GET_RTX_LENGTH (code); i++)
26385     {
26386       switch (*format_ptr++)
26387 	{
26388 	case 'e':
26389 	  if (exact_dependency_1 (addr, XEXP (insn, i)))
26390 	    return true;
26391 	  break;
26392 	case 'E':
26393 	  for (j = 0; j < XVECLEN (insn, i); j++)
26394 	    if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26395 	      return true;
26396 	    break;
26397 	}
26398     }
26399   return false;
26400 }
26401 
26402 /* Return true if there exists exact dependency for store & load, i.e.
26403    the same memory address is used in them.  */
26404 static bool
26405 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26406 {
26407   rtx set1, set2;
26408 
26409   set1 = single_set (store);
26410   if (!set1)
26411     return false;
26412   if (!MEM_P (SET_DEST (set1)))
26413     return false;
26414   set2 = single_set (load);
26415   if (!set2)
26416     return false;
26417   if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26418     return true;
26419   return false;
26420 }
26421 
26422 static int
26423 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26424 {
26425   enum attr_type insn_type, dep_insn_type;
26426   enum attr_memory memory;
26427   rtx set, set2;
26428   int dep_insn_code_number;
26429 
26430   /* Anti and output dependencies have zero cost on all CPUs.  */
26431   if (REG_NOTE_KIND (link) != 0)
26432     return 0;
26433 
26434   dep_insn_code_number = recog_memoized (dep_insn);
26435 
26436   /* If we can't recognize the insns, we can't really do anything.  */
26437   if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26438     return cost;
26439 
26440   insn_type = get_attr_type (insn);
26441   dep_insn_type = get_attr_type (dep_insn);
26442 
26443   switch (ix86_tune)
26444     {
26445     case PROCESSOR_PENTIUM:
26446       /* Address Generation Interlock adds a cycle of latency.  */
26447       if (insn_type == TYPE_LEA)
26448 	{
26449 	  rtx addr = PATTERN (insn);
26450 
26451 	  if (GET_CODE (addr) == PARALLEL)
26452 	    addr = XVECEXP (addr, 0, 0);
26453 
26454 	  gcc_assert (GET_CODE (addr) == SET);
26455 
26456 	  addr = SET_SRC (addr);
26457 	  if (modified_in_p (addr, dep_insn))
26458 	    cost += 1;
26459 	}
26460       else if (ix86_agi_dependent (dep_insn, insn))
26461 	cost += 1;
26462 
26463       /* ??? Compares pair with jump/setcc.  */
26464       if (ix86_flags_dependent (insn, dep_insn, insn_type))
26465 	cost = 0;
26466 
26467       /* Floating point stores require value to be ready one cycle earlier.  */
26468       if (insn_type == TYPE_FMOV
26469 	  && get_attr_memory (insn) == MEMORY_STORE
26470 	  && !ix86_agi_dependent (dep_insn, insn))
26471 	cost += 1;
26472       break;
26473 
26474     case PROCESSOR_PENTIUMPRO:
26475       /* INT->FP conversion is expensive.  */
26476       if (get_attr_fp_int_src (dep_insn))
26477 	cost += 5;
26478 
26479       /* There is one cycle extra latency between an FP op and a store.  */
26480       if (insn_type == TYPE_FMOV
26481 	  && (set = single_set (dep_insn)) != NULL_RTX
26482 	  && (set2 = single_set (insn)) != NULL_RTX
26483 	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26484 	  && MEM_P (SET_DEST (set2)))
26485 	cost += 1;
26486 
26487       memory = get_attr_memory (insn);
26488 
26489       /* Show ability of reorder buffer to hide latency of load by executing
26490 	 in parallel with previous instruction in case
26491 	 previous instruction is not needed to compute the address.  */
26492       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26493 	  && !ix86_agi_dependent (dep_insn, insn))
26494 	{
26495 	  /* Claim moves to take one cycle, as core can issue one load
26496 	     at time and the next load can start cycle later.  */
26497 	  if (dep_insn_type == TYPE_IMOV
26498 	      || dep_insn_type == TYPE_FMOV)
26499 	    cost = 1;
26500 	  else if (cost > 1)
26501 	    cost--;
26502 	}
26503       break;
26504 
26505     case PROCESSOR_K6:
26506      /* The esp dependency is resolved before
26507 	the instruction is really finished.  */
26508       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26509 	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26510 	return 1;
26511 
26512       /* INT->FP conversion is expensive.  */
26513       if (get_attr_fp_int_src (dep_insn))
26514 	cost += 5;
26515 
26516       memory = get_attr_memory (insn);
26517 
26518       /* Show ability of reorder buffer to hide latency of load by executing
26519 	 in parallel with previous instruction in case
26520 	 previous instruction is not needed to compute the address.  */
26521       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26522 	  && !ix86_agi_dependent (dep_insn, insn))
26523 	{
26524 	  /* Claim moves to take one cycle, as core can issue one load
26525 	     at time and the next load can start cycle later.  */
26526 	  if (dep_insn_type == TYPE_IMOV
26527 	      || dep_insn_type == TYPE_FMOV)
26528 	    cost = 1;
26529 	  else if (cost > 2)
26530 	    cost -= 2;
26531 	  else
26532 	    cost = 1;
26533 	}
26534       break;
26535 
26536     case PROCESSOR_AMDFAM10:
26537     case PROCESSOR_BDVER1:
26538     case PROCESSOR_BDVER2:
26539     case PROCESSOR_BDVER3:
26540     case PROCESSOR_BDVER4:
26541     case PROCESSOR_BTVER1:
26542     case PROCESSOR_BTVER2:
26543     case PROCESSOR_GENERIC:
26544       /* Stack engine allows to execute push&pop instructions in parall.  */
26545       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26546 	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26547 	return 0;
26548       /* FALLTHRU */
26549 
26550     case PROCESSOR_ATHLON:
26551     case PROCESSOR_K8:
26552       memory = get_attr_memory (insn);
26553 
26554       /* Show ability of reorder buffer to hide latency of load by executing
26555 	 in parallel with previous instruction in case
26556 	 previous instruction is not needed to compute the address.  */
26557       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26558 	  && !ix86_agi_dependent (dep_insn, insn))
26559 	{
26560 	  enum attr_unit unit = get_attr_unit (insn);
26561 	  int loadcost = 3;
26562 
26563 	  /* Because of the difference between the length of integer and
26564 	     floating unit pipeline preparation stages, the memory operands
26565 	     for floating point are cheaper.
26566 
26567 	     ??? For Athlon it the difference is most probably 2.  */
26568 	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26569 	    loadcost = 3;
26570 	  else
26571 	    loadcost = TARGET_ATHLON ? 2 : 0;
26572 
26573 	  if (cost >= loadcost)
26574 	    cost -= loadcost;
26575 	  else
26576 	    cost = 0;
26577 	}
26578       break;
26579 
26580     case PROCESSOR_CORE2:
26581     case PROCESSOR_NEHALEM:
26582     case PROCESSOR_SANDYBRIDGE:
26583     case PROCESSOR_HASWELL:
26584       /* Stack engine allows to execute push&pop instructions in parall.  */
26585       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26586 	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26587 	return 0;
26588 
26589       memory = get_attr_memory (insn);
26590 
26591       /* Show ability of reorder buffer to hide latency of load by executing
26592 	 in parallel with previous instruction in case
26593 	 previous instruction is not needed to compute the address.  */
26594       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26595 	  && !ix86_agi_dependent (dep_insn, insn))
26596 	{
26597 	  if (cost >= 4)
26598 	    cost -= 4;
26599 	  else
26600 	    cost = 0;
26601 	}
26602       break;
26603 
26604     case PROCESSOR_SILVERMONT:
26605     case PROCESSOR_KNL:
26606     case PROCESSOR_INTEL:
26607       if (!reload_completed)
26608 	return cost;
26609 
26610       /* Increase cost of integer loads.  */
26611       memory = get_attr_memory (dep_insn);
26612       if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26613 	{
26614 	  enum attr_unit unit = get_attr_unit (dep_insn);
26615 	  if (unit == UNIT_INTEGER && cost == 1)
26616 	    {
26617 	      if (memory == MEMORY_LOAD)
26618 		cost = 3;
26619 	      else
26620 		{
26621 		  /* Increase cost of ld/st for short int types only
26622 		     because of store forwarding issue.  */
26623 		  rtx set = single_set (dep_insn);
26624 		  if (set && (GET_MODE (SET_DEST (set)) == QImode
26625 			      || GET_MODE (SET_DEST (set)) == HImode))
26626 		    {
26627 		      /* Increase cost of store/load insn if exact
26628 			 dependence exists and it is load insn.  */
26629 		      enum attr_memory insn_memory = get_attr_memory (insn);
26630 		      if (insn_memory == MEMORY_LOAD
26631 			  && exact_store_load_dependency (dep_insn, insn))
26632 			cost = 3;
26633 		    }
26634 		}
26635 	    }
26636 	}
26637 
26638     default:
26639       break;
26640     }
26641 
26642   return cost;
26643 }
26644 
26645 /* How many alternative schedules to try.  This should be as wide as the
26646    scheduling freedom in the DFA, but no wider.  Making this value too
26647    large results extra work for the scheduler.  */
26648 
26649 static int
26650 ia32_multipass_dfa_lookahead (void)
26651 {
26652   switch (ix86_tune)
26653     {
26654     case PROCESSOR_PENTIUM:
26655       return 2;
26656 
26657     case PROCESSOR_PENTIUMPRO:
26658     case PROCESSOR_K6:
26659       return 1;
26660 
26661     case PROCESSOR_BDVER1:
26662     case PROCESSOR_BDVER2:
26663     case PROCESSOR_BDVER3:
26664     case PROCESSOR_BDVER4:
26665       /* We use lookahead value 4 for BD both before and after reload
26666 	 schedules. Plan is to have value 8 included for O3. */
26667         return 4;
26668 
26669     case PROCESSOR_CORE2:
26670     case PROCESSOR_NEHALEM:
26671     case PROCESSOR_SANDYBRIDGE:
26672     case PROCESSOR_HASWELL:
26673     case PROCESSOR_BONNELL:
26674     case PROCESSOR_SILVERMONT:
26675     case PROCESSOR_KNL:
26676     case PROCESSOR_INTEL:
26677       /* Generally, we want haifa-sched:max_issue() to look ahead as far
26678 	 as many instructions can be executed on a cycle, i.e.,
26679 	 issue_rate.  I wonder why tuning for many CPUs does not do this.  */
26680       if (reload_completed)
26681         return ix86_issue_rate ();
26682       /* Don't use lookahead for pre-reload schedule to save compile time.  */
26683       return 0;
26684 
26685     default:
26686       return 0;
26687     }
26688 }
26689 
26690 /* Return true if target platform supports macro-fusion.  */
26691 
26692 static bool
26693 ix86_macro_fusion_p ()
26694 {
26695   return TARGET_FUSE_CMP_AND_BRANCH;
26696 }
26697 
26698 /* Check whether current microarchitecture support macro fusion
26699    for insn pair "CONDGEN + CONDJMP". Refer to
26700    "Intel Architectures Optimization Reference Manual". */
26701 
26702 static bool
26703 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26704 {
26705   rtx src, dest;
26706   enum rtx_code ccode;
26707   rtx compare_set = NULL_RTX, test_if, cond;
26708   rtx alu_set = NULL_RTX, addr = NULL_RTX;
26709 
26710   if (!any_condjump_p (condjmp))
26711     return false;
26712 
26713   if (get_attr_type (condgen) != TYPE_TEST
26714       && get_attr_type (condgen) != TYPE_ICMP
26715       && get_attr_type (condgen) != TYPE_INCDEC
26716       && get_attr_type (condgen) != TYPE_ALU)
26717     return false;
26718 
26719   compare_set = single_set (condgen);
26720   if (compare_set == NULL_RTX
26721       && !TARGET_FUSE_ALU_AND_BRANCH)
26722     return false;
26723 
26724   if (compare_set == NULL_RTX)
26725     {
26726       int i;
26727       rtx pat = PATTERN (condgen);
26728       for (i = 0; i < XVECLEN (pat, 0); i++)
26729 	if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26730 	  {
26731 	    rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26732 	    if (GET_CODE (set_src) == COMPARE)
26733 	      compare_set = XVECEXP (pat, 0, i);
26734 	    else
26735 	      alu_set = XVECEXP (pat, 0, i);
26736 	  }
26737     }
26738   if (compare_set == NULL_RTX)
26739     return false;
26740   src = SET_SRC (compare_set);
26741   if (GET_CODE (src) != COMPARE)
26742     return false;
26743 
26744   /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26745      supported.  */
26746   if ((MEM_P (XEXP (src, 0))
26747        && CONST_INT_P (XEXP (src, 1)))
26748       || (MEM_P (XEXP (src, 1))
26749 	  && CONST_INT_P (XEXP (src, 0))))
26750     return false;
26751 
26752   /* No fusion for RIP-relative address.  */
26753   if (MEM_P (XEXP (src, 0)))
26754     addr = XEXP (XEXP (src, 0), 0);
26755   else if (MEM_P (XEXP (src, 1)))
26756     addr = XEXP (XEXP (src, 1), 0);
26757 
26758   if (addr) {
26759     ix86_address parts;
26760     int ok = ix86_decompose_address (addr, &parts);
26761     gcc_assert (ok);
26762 
26763     if (rip_relative_addr_p (&parts))
26764       return false;
26765   }
26766 
26767   test_if = SET_SRC (pc_set (condjmp));
26768   cond = XEXP (test_if, 0);
26769   ccode = GET_CODE (cond);
26770   /* Check whether conditional jump use Sign or Overflow Flags.  */
26771   if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26772       && (ccode == GE
26773           || ccode == GT
26774 	  || ccode == LE
26775 	  || ccode == LT))
26776     return false;
26777 
26778   /* Return true for TYPE_TEST and TYPE_ICMP.  */
26779   if (get_attr_type (condgen) == TYPE_TEST
26780       || get_attr_type (condgen) == TYPE_ICMP)
26781     return true;
26782 
26783   /* The following is the case that macro-fusion for alu + jmp.  */
26784   if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26785     return false;
26786 
26787   /* No fusion for alu op with memory destination operand.  */
26788   dest = SET_DEST (alu_set);
26789   if (MEM_P (dest))
26790     return false;
26791 
26792   /* Macro-fusion for inc/dec + unsigned conditional jump is not
26793      supported.  */
26794   if (get_attr_type (condgen) == TYPE_INCDEC
26795       && (ccode == GEU
26796 	  || ccode == GTU
26797 	  || ccode == LEU
26798 	  || ccode == LTU))
26799     return false;
26800 
26801   return true;
26802 }
26803 
26804 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26805    execution. It is applied if
26806    (1) IMUL instruction is on the top of list;
26807    (2) There exists the only producer of independent IMUL instruction in
26808        ready list.
26809    Return index of IMUL producer if it was found and -1 otherwise.  */
26810 static int
26811 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26812 {
26813   rtx_insn *insn;
26814   rtx set, insn1, insn2;
26815   sd_iterator_def sd_it;
26816   dep_t dep;
26817   int index = -1;
26818   int i;
26819 
26820   if (!TARGET_BONNELL)
26821     return index;
26822 
26823   /* Check that IMUL instruction is on the top of ready list.  */
26824   insn = ready[n_ready - 1];
26825   set = single_set (insn);
26826   if (!set)
26827     return index;
26828   if (!(GET_CODE (SET_SRC (set)) == MULT
26829       && GET_MODE (SET_SRC (set)) == SImode))
26830     return index;
26831 
26832   /* Search for producer of independent IMUL instruction.  */
26833   for (i = n_ready - 2; i >= 0; i--)
26834     {
26835       insn = ready[i];
26836       if (!NONDEBUG_INSN_P (insn))
26837 	continue;
26838       /* Skip IMUL instruction.  */
26839       insn2 = PATTERN (insn);
26840       if (GET_CODE (insn2) == PARALLEL)
26841 	insn2 = XVECEXP (insn2, 0, 0);
26842       if (GET_CODE (insn2) == SET
26843 	  && GET_CODE (SET_SRC (insn2)) == MULT
26844 	  && GET_MODE (SET_SRC (insn2)) == SImode)
26845 	continue;
26846 
26847       FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26848 	{
26849 	  rtx con;
26850 	  con = DEP_CON (dep);
26851 	  if (!NONDEBUG_INSN_P (con))
26852 	    continue;
26853 	  insn1 = PATTERN (con);
26854 	  if (GET_CODE (insn1) == PARALLEL)
26855 	    insn1 = XVECEXP (insn1, 0, 0);
26856 
26857 	  if (GET_CODE (insn1) == SET
26858 	      && GET_CODE (SET_SRC (insn1)) == MULT
26859 	      && GET_MODE (SET_SRC (insn1)) == SImode)
26860 	    {
26861 	      sd_iterator_def sd_it1;
26862 	      dep_t dep1;
26863 	      /* Check if there is no other dependee for IMUL.  */
26864 	      index = i;
26865 	      FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26866 		{
26867 		  rtx pro;
26868 		  pro = DEP_PRO (dep1);
26869 		  if (!NONDEBUG_INSN_P (pro))
26870 		    continue;
26871 		  if (pro != insn)
26872 		    index = -1;
26873 		}
26874 	      if (index >= 0)
26875 		break;
26876 	    }
26877 	}
26878       if (index >= 0)
26879 	break;
26880     }
26881   return index;
26882 }
26883 
26884 /* Try to find the best candidate on the top of ready list if two insns
26885    have the same priority - candidate is best if its dependees were
26886    scheduled earlier. Applied for Silvermont only.
26887    Return true if top 2 insns must be interchanged.  */
26888 static bool
26889 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26890 {
26891   rtx_insn *top = ready[n_ready - 1];
26892   rtx_insn *next = ready[n_ready - 2];
26893   rtx set;
26894   sd_iterator_def sd_it;
26895   dep_t dep;
26896   int clock1 = -1;
26897   int clock2 = -1;
26898   #define INSN_TICK(INSN) (HID (INSN)->tick)
26899 
26900   if (!TARGET_SILVERMONT && !TARGET_INTEL)
26901     return false;
26902 
26903   if (!NONDEBUG_INSN_P (top))
26904     return false;
26905   if (!NONJUMP_INSN_P (top))
26906     return false;
26907   if (!NONDEBUG_INSN_P (next))
26908     return false;
26909   if (!NONJUMP_INSN_P (next))
26910     return false;
26911   set = single_set (top);
26912   if (!set)
26913     return false;
26914   set = single_set (next);
26915   if (!set)
26916     return false;
26917 
26918   if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26919     {
26920       if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26921 	return false;
26922       /* Determine winner more precise.  */
26923       FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26924 	{
26925 	  rtx pro;
26926 	  pro = DEP_PRO (dep);
26927 	  if (!NONDEBUG_INSN_P (pro))
26928 	    continue;
26929 	  if (INSN_TICK (pro) > clock1)
26930 	    clock1 = INSN_TICK (pro);
26931 	}
26932       FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26933 	{
26934 	  rtx pro;
26935 	  pro = DEP_PRO (dep);
26936 	  if (!NONDEBUG_INSN_P (pro))
26937 	    continue;
26938 	  if (INSN_TICK (pro) > clock2)
26939 	    clock2 = INSN_TICK (pro);
26940 	}
26941 
26942       if (clock1 == clock2)
26943 	{
26944 	  /* Determine winner - load must win. */
26945 	  enum attr_memory memory1, memory2;
26946 	  memory1 = get_attr_memory (top);
26947 	  memory2 = get_attr_memory (next);
26948 	  if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26949 	    return true;
26950 	}
26951 	return (bool) (clock2 < clock1);
26952     }
26953   return false;
26954   #undef INSN_TICK
26955 }
26956 
26957 /* Perform possible reodering of ready list for Atom/Silvermont only.
26958    Return issue rate.  */
26959 static int
26960 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26961 		    int *pn_ready, int clock_var)
26962 {
26963   int issue_rate = -1;
26964   int n_ready = *pn_ready;
26965   int i;
26966   rtx_insn *insn;
26967   int index = -1;
26968 
26969   /* Set up issue rate.  */
26970   issue_rate = ix86_issue_rate ();
26971 
26972   /* Do reodering for BONNELL/SILVERMONT only.  */
26973   if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26974     return issue_rate;
26975 
26976   /* Nothing to do if ready list contains only 1 instruction.  */
26977   if (n_ready <= 1)
26978     return issue_rate;
26979 
26980   /* Do reodering for post-reload scheduler only.  */
26981   if (!reload_completed)
26982     return issue_rate;
26983 
26984   if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26985     {
26986       if (sched_verbose > 1)
26987 	fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26988 		 INSN_UID (ready[index]));
26989 
26990       /* Put IMUL producer (ready[index]) at the top of ready list.  */
26991       insn = ready[index];
26992       for (i = index; i < n_ready - 1; i++)
26993 	ready[i] = ready[i + 1];
26994       ready[n_ready - 1] = insn;
26995       return issue_rate;
26996     }
26997 
26998   /* Skip selective scheduling since HID is not populated in it.  */
26999   if (clock_var != 0
27000       && !sel_sched_p ()
27001       && swap_top_of_ready_list (ready, n_ready))
27002     {
27003       if (sched_verbose > 1)
27004 	fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
27005 		 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
27006       /* Swap 2 top elements of ready list.  */
27007       insn = ready[n_ready - 1];
27008       ready[n_ready - 1] = ready[n_ready - 2];
27009       ready[n_ready - 2] = insn;
27010     }
27011   return issue_rate;
27012 }
27013 
27014 static bool
27015 ix86_class_likely_spilled_p (reg_class_t);
27016 
27017 /* Returns true if lhs of insn is HW function argument register and set up
27018    is_spilled to true if it is likely spilled HW register.  */
27019 static bool
27020 insn_is_function_arg (rtx insn, bool* is_spilled)
27021 {
27022   rtx dst;
27023 
27024   if (!NONDEBUG_INSN_P (insn))
27025     return false;
27026   /* Call instructions are not movable, ignore it.  */
27027   if (CALL_P (insn))
27028     return false;
27029   insn = PATTERN (insn);
27030   if (GET_CODE (insn) == PARALLEL)
27031     insn = XVECEXP (insn, 0, 0);
27032   if (GET_CODE (insn) != SET)
27033     return false;
27034   dst = SET_DEST (insn);
27035   if (REG_P (dst) && HARD_REGISTER_P (dst)
27036       && ix86_function_arg_regno_p (REGNO (dst)))
27037     {
27038       /* Is it likely spilled HW register?  */
27039       if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
27040 	  && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
27041 	*is_spilled = true;
27042       return true;
27043     }
27044   return false;
27045 }
27046 
27047 /* Add output dependencies for chain of function adjacent arguments if only
27048    there is a move to likely spilled HW register.  Return first argument
27049    if at least one dependence was added or NULL otherwise.  */
27050 static rtx_insn *
27051 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
27052 {
27053   rtx_insn *insn;
27054   rtx_insn *last = call;
27055   rtx_insn *first_arg = NULL;
27056   bool is_spilled = false;
27057 
27058   head = PREV_INSN (head);
27059 
27060   /* Find nearest to call argument passing instruction.  */
27061   while (true)
27062     {
27063       last = PREV_INSN (last);
27064       if (last == head)
27065 	return NULL;
27066       if (!NONDEBUG_INSN_P (last))
27067 	continue;
27068       if (insn_is_function_arg (last, &is_spilled))
27069 	break;
27070       return NULL;
27071     }
27072 
27073   first_arg = last;
27074   while (true)
27075     {
27076       insn = PREV_INSN (last);
27077       if (!INSN_P (insn))
27078 	break;
27079       if (insn == head)
27080 	break;
27081       if (!NONDEBUG_INSN_P (insn))
27082 	{
27083 	  last = insn;
27084 	  continue;
27085 	}
27086       if (insn_is_function_arg (insn, &is_spilled))
27087 	{
27088 	  /* Add output depdendence between two function arguments if chain
27089 	     of output arguments contains likely spilled HW registers.  */
27090 	  if (is_spilled)
27091 	    add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27092 	  first_arg = last = insn;
27093 	}
27094       else
27095 	break;
27096     }
27097   if (!is_spilled)
27098     return NULL;
27099   return first_arg;
27100 }
27101 
27102 /* Add output or anti dependency from insn to first_arg to restrict its code
27103    motion.  */
27104 static void
27105 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
27106 {
27107   rtx set;
27108   rtx tmp;
27109 
27110   /* Add anti dependencies for bounds stores.  */
27111   if (INSN_P (insn)
27112       && GET_CODE (PATTERN (insn)) == PARALLEL
27113       && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
27114       && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
27115     {
27116       add_dependence (first_arg, insn, REG_DEP_ANTI);
27117       return;
27118     }
27119 
27120   set = single_set (insn);
27121   if (!set)
27122     return;
27123   tmp = SET_DEST (set);
27124   if (REG_P (tmp))
27125     {
27126       /* Add output dependency to the first function argument.  */
27127       add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27128       return;
27129     }
27130   /* Add anti dependency.  */
27131   add_dependence (first_arg, insn, REG_DEP_ANTI);
27132 }
27133 
27134 /* Avoid cross block motion of function argument through adding dependency
27135    from the first non-jump instruction in bb.  */
27136 static void
27137 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
27138 {
27139   rtx_insn *insn = BB_END (bb);
27140 
27141   while (insn)
27142     {
27143       if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
27144 	{
27145 	  rtx set = single_set (insn);
27146 	  if (set)
27147 	    {
27148 	      avoid_func_arg_motion (arg, insn);
27149 	      return;
27150 	    }
27151 	}
27152       if (insn == BB_HEAD (bb))
27153 	return;
27154       insn = PREV_INSN (insn);
27155     }
27156 }
27157 
27158 /* Hook for pre-reload schedule - avoid motion of function arguments
27159    passed in likely spilled HW registers.  */
27160 static void
27161 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
27162 {
27163   rtx_insn *insn;
27164   rtx_insn *first_arg = NULL;
27165   if (reload_completed)
27166     return;
27167   while (head != tail && DEBUG_INSN_P (head))
27168     head = NEXT_INSN (head);
27169   for (insn = tail; insn != head; insn = PREV_INSN (insn))
27170     if (INSN_P (insn) && CALL_P (insn))
27171       {
27172 	first_arg = add_parameter_dependencies (insn, head);
27173 	if (first_arg)
27174 	  {
27175 	    /* Add dependee for first argument to predecessors if only
27176 	       region contains more than one block.  */
27177 	    basic_block bb =  BLOCK_FOR_INSN (insn);
27178 	    int rgn = CONTAINING_RGN (bb->index);
27179 	    int nr_blks = RGN_NR_BLOCKS (rgn);
27180 	    /* Skip trivial regions and region head blocks that can have
27181 	       predecessors outside of region.  */
27182 	    if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
27183 	      {
27184 		edge e;
27185 		edge_iterator ei;
27186 
27187 		/* Regions are SCCs with the exception of selective
27188 		   scheduling with pipelining of outer blocks enabled.
27189 		   So also check that immediate predecessors of a non-head
27190 		   block are in the same region.  */
27191 		FOR_EACH_EDGE (e, ei, bb->preds)
27192 		  {
27193 		    /* Avoid creating of loop-carried dependencies through
27194 		       using topological ordering in the region.  */
27195 		    if (rgn == CONTAINING_RGN (e->src->index)
27196 			&& BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27197 		      add_dependee_for_func_arg (first_arg, e->src);
27198 		  }
27199 	      }
27200 	    insn = first_arg;
27201 	    if (insn == head)
27202 	      break;
27203 	  }
27204       }
27205     else if (first_arg)
27206       avoid_func_arg_motion (first_arg, insn);
27207 }
27208 
27209 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27210    HW registers to maximum, to schedule them at soon as possible. These are
27211    moves from function argument registers at the top of the function entry
27212    and moves from function return value registers after call.  */
27213 static int
27214 ix86_adjust_priority (rtx_insn *insn, int priority)
27215 {
27216   rtx set;
27217 
27218   if (reload_completed)
27219     return priority;
27220 
27221   if (!NONDEBUG_INSN_P (insn))
27222     return priority;
27223 
27224   set = single_set (insn);
27225   if (set)
27226     {
27227       rtx tmp = SET_SRC (set);
27228       if (REG_P (tmp)
27229           && HARD_REGISTER_P (tmp)
27230           && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27231           && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27232 	return current_sched_info->sched_max_insns_priority;
27233     }
27234 
27235   return priority;
27236 }
27237 
27238 /* Model decoder of Core 2/i7.
27239    Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27240    track the instruction fetch block boundaries and make sure that long
27241    (9+ bytes) instructions are assigned to D0.  */
27242 
27243 /* Maximum length of an insn that can be handled by
27244    a secondary decoder unit.  '8' for Core 2/i7.  */
27245 static int core2i7_secondary_decoder_max_insn_size;
27246 
27247 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27248    '16' for Core 2/i7.  */
27249 static int core2i7_ifetch_block_size;
27250 
27251 /* Maximum number of instructions decoder can handle per cycle.
27252    '6' for Core 2/i7.  */
27253 static int core2i7_ifetch_block_max_insns;
27254 
27255 typedef struct ix86_first_cycle_multipass_data_ *
27256   ix86_first_cycle_multipass_data_t;
27257 typedef const struct ix86_first_cycle_multipass_data_ *
27258   const_ix86_first_cycle_multipass_data_t;
27259 
27260 /* A variable to store target state across calls to max_issue within
27261    one cycle.  */
27262 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27263   *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27264 
27265 /* Initialize DATA.  */
27266 static void
27267 core2i7_first_cycle_multipass_init (void *_data)
27268 {
27269   ix86_first_cycle_multipass_data_t data
27270     = (ix86_first_cycle_multipass_data_t) _data;
27271 
27272   data->ifetch_block_len = 0;
27273   data->ifetch_block_n_insns = 0;
27274   data->ready_try_change = NULL;
27275   data->ready_try_change_size = 0;
27276 }
27277 
27278 /* Advancing the cycle; reset ifetch block counts.  */
27279 static void
27280 core2i7_dfa_post_advance_cycle (void)
27281 {
27282   ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27283 
27284   gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27285 
27286   data->ifetch_block_len = 0;
27287   data->ifetch_block_n_insns = 0;
27288 }
27289 
27290 static int min_insn_size (rtx_insn *);
27291 
27292 /* Filter out insns from ready_try that the core will not be able to issue
27293    on current cycle due to decoder.  */
27294 static void
27295 core2i7_first_cycle_multipass_filter_ready_try
27296 (const_ix86_first_cycle_multipass_data_t data,
27297  signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27298 {
27299   while (n_ready--)
27300     {
27301       rtx_insn *insn;
27302       int insn_size;
27303 
27304       if (ready_try[n_ready])
27305 	continue;
27306 
27307       insn = get_ready_element (n_ready);
27308       insn_size = min_insn_size (insn);
27309 
27310       if (/* If this is a too long an insn for a secondary decoder ...  */
27311 	  (!first_cycle_insn_p
27312 	   && insn_size > core2i7_secondary_decoder_max_insn_size)
27313 	  /* ... or it would not fit into the ifetch block ...  */
27314 	  || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27315 	  /* ... or the decoder is full already ...  */
27316 	  || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27317 	/* ... mask the insn out.  */
27318 	{
27319 	  ready_try[n_ready] = 1;
27320 
27321 	  if (data->ready_try_change)
27322 	    bitmap_set_bit (data->ready_try_change, n_ready);
27323 	}
27324     }
27325 }
27326 
27327 /* Prepare for a new round of multipass lookahead scheduling.  */
27328 static void
27329 core2i7_first_cycle_multipass_begin (void *_data,
27330 				     signed char *ready_try, int n_ready,
27331 				     bool first_cycle_insn_p)
27332 {
27333   ix86_first_cycle_multipass_data_t data
27334     = (ix86_first_cycle_multipass_data_t) _data;
27335   const_ix86_first_cycle_multipass_data_t prev_data
27336     = ix86_first_cycle_multipass_data;
27337 
27338   /* Restore the state from the end of the previous round.  */
27339   data->ifetch_block_len = prev_data->ifetch_block_len;
27340   data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27341 
27342   /* Filter instructions that cannot be issued on current cycle due to
27343      decoder restrictions.  */
27344   core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27345 						  first_cycle_insn_p);
27346 }
27347 
27348 /* INSN is being issued in current solution.  Account for its impact on
27349    the decoder model.  */
27350 static void
27351 core2i7_first_cycle_multipass_issue (void *_data,
27352 				     signed char *ready_try, int n_ready,
27353 				     rtx_insn *insn, const void *_prev_data)
27354 {
27355   ix86_first_cycle_multipass_data_t data
27356     = (ix86_first_cycle_multipass_data_t) _data;
27357   const_ix86_first_cycle_multipass_data_t prev_data
27358     = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27359 
27360   int insn_size = min_insn_size (insn);
27361 
27362   data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27363   data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27364   gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27365 	      && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27366 
27367   /* Allocate or resize the bitmap for storing INSN's effect on ready_try.  */
27368   if (!data->ready_try_change)
27369     {
27370       data->ready_try_change = sbitmap_alloc (n_ready);
27371       data->ready_try_change_size = n_ready;
27372     }
27373   else if (data->ready_try_change_size < n_ready)
27374     {
27375       data->ready_try_change = sbitmap_resize (data->ready_try_change,
27376 					       n_ready, 0);
27377       data->ready_try_change_size = n_ready;
27378     }
27379   bitmap_clear (data->ready_try_change);
27380 
27381   /* Filter out insns from ready_try that the core will not be able to issue
27382      on current cycle due to decoder.  */
27383   core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27384 						  false);
27385 }
27386 
27387 /* Revert the effect on ready_try.  */
27388 static void
27389 core2i7_first_cycle_multipass_backtrack (const void *_data,
27390 					 signed char *ready_try,
27391 					 int n_ready ATTRIBUTE_UNUSED)
27392 {
27393   const_ix86_first_cycle_multipass_data_t data
27394     = (const_ix86_first_cycle_multipass_data_t) _data;
27395   unsigned int i = 0;
27396   sbitmap_iterator sbi;
27397 
27398   gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27399   EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27400     {
27401       ready_try[i] = 0;
27402     }
27403 }
27404 
27405 /* Save the result of multipass lookahead scheduling for the next round.  */
27406 static void
27407 core2i7_first_cycle_multipass_end (const void *_data)
27408 {
27409   const_ix86_first_cycle_multipass_data_t data
27410     = (const_ix86_first_cycle_multipass_data_t) _data;
27411   ix86_first_cycle_multipass_data_t next_data
27412     = ix86_first_cycle_multipass_data;
27413 
27414   if (data != NULL)
27415     {
27416       next_data->ifetch_block_len = data->ifetch_block_len;
27417       next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27418     }
27419 }
27420 
27421 /* Deallocate target data.  */
27422 static void
27423 core2i7_first_cycle_multipass_fini (void *_data)
27424 {
27425   ix86_first_cycle_multipass_data_t data
27426     = (ix86_first_cycle_multipass_data_t) _data;
27427 
27428   if (data->ready_try_change)
27429     {
27430       sbitmap_free (data->ready_try_change);
27431       data->ready_try_change = NULL;
27432       data->ready_try_change_size = 0;
27433     }
27434 }
27435 
27436 /* Prepare for scheduling pass.  */
27437 static void
27438 ix86_sched_init_global (FILE *, int, int)
27439 {
27440   /* Install scheduling hooks for current CPU.  Some of these hooks are used
27441      in time-critical parts of the scheduler, so we only set them up when
27442      they are actually used.  */
27443   switch (ix86_tune)
27444     {
27445     case PROCESSOR_CORE2:
27446     case PROCESSOR_NEHALEM:
27447     case PROCESSOR_SANDYBRIDGE:
27448     case PROCESSOR_HASWELL:
27449       /* Do not perform multipass scheduling for pre-reload schedule
27450          to save compile time.  */
27451       if (reload_completed)
27452 	{
27453 	  targetm.sched.dfa_post_advance_cycle
27454 	    = core2i7_dfa_post_advance_cycle;
27455 	  targetm.sched.first_cycle_multipass_init
27456 	    = core2i7_first_cycle_multipass_init;
27457 	  targetm.sched.first_cycle_multipass_begin
27458 	    = core2i7_first_cycle_multipass_begin;
27459 	  targetm.sched.first_cycle_multipass_issue
27460 	    = core2i7_first_cycle_multipass_issue;
27461 	  targetm.sched.first_cycle_multipass_backtrack
27462 	    = core2i7_first_cycle_multipass_backtrack;
27463 	  targetm.sched.first_cycle_multipass_end
27464 	    = core2i7_first_cycle_multipass_end;
27465 	  targetm.sched.first_cycle_multipass_fini
27466 	    = core2i7_first_cycle_multipass_fini;
27467 
27468 	  /* Set decoder parameters.  */
27469 	  core2i7_secondary_decoder_max_insn_size = 8;
27470 	  core2i7_ifetch_block_size = 16;
27471 	  core2i7_ifetch_block_max_insns = 6;
27472 	  break;
27473 	}
27474       /* ... Fall through ...  */
27475     default:
27476       targetm.sched.dfa_post_advance_cycle = NULL;
27477       targetm.sched.first_cycle_multipass_init = NULL;
27478       targetm.sched.first_cycle_multipass_begin = NULL;
27479       targetm.sched.first_cycle_multipass_issue = NULL;
27480       targetm.sched.first_cycle_multipass_backtrack = NULL;
27481       targetm.sched.first_cycle_multipass_end = NULL;
27482       targetm.sched.first_cycle_multipass_fini = NULL;
27483       break;
27484     }
27485 }
27486 
27487 
27488 /* Compute the alignment given to a constant that is being placed in memory.
27489    EXP is the constant and ALIGN is the alignment that the object would
27490    ordinarily have.
27491    The value of this function is used instead of that alignment to align
27492    the object.  */
27493 
27494 int
27495 ix86_constant_alignment (tree exp, int align)
27496 {
27497   if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27498       || TREE_CODE (exp) == INTEGER_CST)
27499     {
27500       if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27501 	return 64;
27502       else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27503 	return 128;
27504     }
27505   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27506 	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27507     return BITS_PER_WORD;
27508 
27509   return align;
27510 }
27511 
27512 /* Compute the alignment for a static variable.
27513    TYPE is the data type, and ALIGN is the alignment that
27514    the object would ordinarily have.  The value of this function is used
27515    instead of that alignment to align the object.  */
27516 
27517 int
27518 ix86_data_alignment (tree type, int align, bool opt)
27519 {
27520   /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27521      for symbols from other compilation units or symbols that don't need
27522      to bind locally.  In order to preserve some ABI compatibility with
27523      those compilers, ensure we don't decrease alignment from what we
27524      used to assume.  */
27525 
27526   int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27527 
27528   /* A data structure, equal or greater than the size of a cache line
27529      (64 bytes in the Pentium 4 and other recent Intel processors, including
27530      processors based on Intel Core microarchitecture) should be aligned
27531      so that its base address is a multiple of a cache line size.  */
27532 
27533   int max_align
27534     = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27535 
27536   if (max_align < BITS_PER_WORD)
27537     max_align = BITS_PER_WORD;
27538 
27539   switch (ix86_align_data_type)
27540     {
27541     case ix86_align_data_type_abi: opt = false; break;
27542     case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27543     case ix86_align_data_type_cacheline: break;
27544     }
27545 
27546   if (opt
27547       && AGGREGATE_TYPE_P (type)
27548       && TYPE_SIZE (type)
27549       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27550     {
27551       if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27552 	  && align < max_align_compat)
27553 	align = max_align_compat;
27554        if (wi::geu_p (TYPE_SIZE (type), max_align)
27555 	   && align < max_align)
27556 	 align = max_align;
27557     }
27558 
27559   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27560      to 16byte boundary.  */
27561   if (TARGET_64BIT)
27562     {
27563       if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27564 	  && TYPE_SIZE (type)
27565 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27566 	  && wi::geu_p (TYPE_SIZE (type), 128)
27567 	  && align < 128)
27568 	return 128;
27569     }
27570 
27571   if (!opt)
27572     return align;
27573 
27574   if (TREE_CODE (type) == ARRAY_TYPE)
27575     {
27576       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27577 	return 64;
27578       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27579 	return 128;
27580     }
27581   else if (TREE_CODE (type) == COMPLEX_TYPE)
27582     {
27583 
27584       if (TYPE_MODE (type) == DCmode && align < 64)
27585 	return 64;
27586       if ((TYPE_MODE (type) == XCmode
27587 	   || TYPE_MODE (type) == TCmode) && align < 128)
27588 	return 128;
27589     }
27590   else if ((TREE_CODE (type) == RECORD_TYPE
27591 	    || TREE_CODE (type) == UNION_TYPE
27592 	    || TREE_CODE (type) == QUAL_UNION_TYPE)
27593 	   && TYPE_FIELDS (type))
27594     {
27595       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27596 	return 64;
27597       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27598 	return 128;
27599     }
27600   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27601 	   || TREE_CODE (type) == INTEGER_TYPE)
27602     {
27603       if (TYPE_MODE (type) == DFmode && align < 64)
27604 	return 64;
27605       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27606 	return 128;
27607     }
27608 
27609   return align;
27610 }
27611 
27612 /* Compute the alignment for a local variable or a stack slot.  EXP is
27613    the data type or decl itself, MODE is the widest mode available and
27614    ALIGN is the alignment that the object would ordinarily have.  The
27615    value of this macro is used instead of that alignment to align the
27616    object.  */
27617 
27618 unsigned int
27619 ix86_local_alignment (tree exp, machine_mode mode,
27620 		      unsigned int align)
27621 {
27622   tree type, decl;
27623 
27624   if (exp && DECL_P (exp))
27625     {
27626       type = TREE_TYPE (exp);
27627       decl = exp;
27628     }
27629   else
27630     {
27631       type = exp;
27632       decl = NULL;
27633     }
27634 
27635   /* Don't do dynamic stack realignment for long long objects with
27636      -mpreferred-stack-boundary=2.  */
27637   if (!TARGET_64BIT
27638       && align == 64
27639       && ix86_preferred_stack_boundary < 64
27640       && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27641       && (!type || !TYPE_USER_ALIGN (type))
27642       && (!decl || !DECL_USER_ALIGN (decl)))
27643     align = 32;
27644 
27645   /* If TYPE is NULL, we are allocating a stack slot for caller-save
27646      register in MODE.  We will return the largest alignment of XF
27647      and DF.  */
27648   if (!type)
27649     {
27650       if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27651 	align = GET_MODE_ALIGNMENT (DFmode);
27652       return align;
27653     }
27654 
27655   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27656      to 16byte boundary.  Exact wording is:
27657 
27658      An array uses the same alignment as its elements, except that a local or
27659      global array variable of length at least 16 bytes or
27660      a C99 variable-length array variable always has alignment of at least 16 bytes.
27661 
27662      This was added to allow use of aligned SSE instructions at arrays.  This
27663      rule is meant for static storage (where compiler can not do the analysis
27664      by itself).  We follow it for automatic variables only when convenient.
27665      We fully control everything in the function compiled and functions from
27666      other unit can not rely on the alignment.
27667 
27668      Exclude va_list type.  It is the common case of local array where
27669      we can not benefit from the alignment.
27670 
27671      TODO: Probably one should optimize for size only when var is not escaping.  */
27672   if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27673       && TARGET_SSE)
27674     {
27675       if (AGGREGATE_TYPE_P (type)
27676 	  && (va_list_type_node == NULL_TREE
27677 	      || (TYPE_MAIN_VARIANT (type)
27678 		  != TYPE_MAIN_VARIANT (va_list_type_node)))
27679 	  && TYPE_SIZE (type)
27680 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27681 	  && wi::geu_p (TYPE_SIZE (type), 16)
27682 	  && align < 128)
27683 	return 128;
27684     }
27685   if (TREE_CODE (type) == ARRAY_TYPE)
27686     {
27687       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27688 	return 64;
27689       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27690 	return 128;
27691     }
27692   else if (TREE_CODE (type) == COMPLEX_TYPE)
27693     {
27694       if (TYPE_MODE (type) == DCmode && align < 64)
27695 	return 64;
27696       if ((TYPE_MODE (type) == XCmode
27697 	   || TYPE_MODE (type) == TCmode) && align < 128)
27698 	return 128;
27699     }
27700   else if ((TREE_CODE (type) == RECORD_TYPE
27701 	    || TREE_CODE (type) == UNION_TYPE
27702 	    || TREE_CODE (type) == QUAL_UNION_TYPE)
27703 	   && TYPE_FIELDS (type))
27704     {
27705       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27706 	return 64;
27707       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27708 	return 128;
27709     }
27710   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27711 	   || TREE_CODE (type) == INTEGER_TYPE)
27712     {
27713 
27714       if (TYPE_MODE (type) == DFmode && align < 64)
27715 	return 64;
27716       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27717 	return 128;
27718     }
27719   return align;
27720 }
27721 
27722 /* Compute the minimum required alignment for dynamic stack realignment
27723    purposes for a local variable, parameter or a stack slot.  EXP is
27724    the data type or decl itself, MODE is its mode and ALIGN is the
27725    alignment that the object would ordinarily have.  */
27726 
27727 unsigned int
27728 ix86_minimum_alignment (tree exp, machine_mode mode,
27729 			unsigned int align)
27730 {
27731   tree type, decl;
27732 
27733   if (exp && DECL_P (exp))
27734     {
27735       type = TREE_TYPE (exp);
27736       decl = exp;
27737     }
27738   else
27739     {
27740       type = exp;
27741       decl = NULL;
27742     }
27743 
27744   if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27745     return align;
27746 
27747   /* Don't do dynamic stack realignment for long long objects with
27748      -mpreferred-stack-boundary=2.  */
27749   if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27750       && (!type || !TYPE_USER_ALIGN (type))
27751       && (!decl || !DECL_USER_ALIGN (decl)))
27752     return 32;
27753 
27754   return align;
27755 }
27756 
27757 /* Find a location for the static chain incoming to a nested function.
27758    This is a register, unless all free registers are used by arguments.  */
27759 
27760 static rtx
27761 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27762 {
27763   unsigned regno;
27764 
27765   /* While this function won't be called by the middle-end when a static
27766      chain isn't needed, it's also used throughout the backend so it's
27767      easiest to keep this check centralized.  */
27768   if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27769     return NULL;
27770 
27771   if (TARGET_64BIT)
27772     {
27773       /* We always use R10 in 64-bit mode.  */
27774       regno = R10_REG;
27775     }
27776   else
27777     {
27778       const_tree fntype, fndecl;
27779       unsigned int ccvt;
27780 
27781       /* By default in 32-bit mode we use ECX to pass the static chain.  */
27782       regno = CX_REG;
27783 
27784       if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27785 	{
27786           fntype = TREE_TYPE (fndecl_or_type);
27787 	  fndecl = fndecl_or_type;
27788 	}
27789       else
27790 	{
27791 	  fntype = fndecl_or_type;
27792 	  fndecl = NULL;
27793 	}
27794 
27795       ccvt = ix86_get_callcvt (fntype);
27796       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27797 	{
27798 	  /* Fastcall functions use ecx/edx for arguments, which leaves
27799 	     us with EAX for the static chain.
27800 	     Thiscall functions use ecx for arguments, which also
27801 	     leaves us with EAX for the static chain.  */
27802 	  regno = AX_REG;
27803 	}
27804       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27805 	{
27806 	  /* Thiscall functions use ecx for arguments, which leaves
27807 	     us with EAX and EDX for the static chain.
27808 	     We are using for abi-compatibility EAX.  */
27809 	  regno = AX_REG;
27810 	}
27811       else if (ix86_function_regparm (fntype, fndecl) == 3)
27812 	{
27813 	  /* For regparm 3, we have no free call-clobbered registers in
27814 	     which to store the static chain.  In order to implement this,
27815 	     we have the trampoline push the static chain to the stack.
27816 	     However, we can't push a value below the return address when
27817 	     we call the nested function directly, so we have to use an
27818 	     alternate entry point.  For this we use ESI, and have the
27819 	     alternate entry point push ESI, so that things appear the
27820 	     same once we're executing the nested function.  */
27821 	  if (incoming_p)
27822 	    {
27823 	      if (fndecl == current_function_decl)
27824 		ix86_static_chain_on_stack = true;
27825 	      return gen_frame_mem (SImode,
27826 				    plus_constant (Pmode,
27827 						   arg_pointer_rtx, -8));
27828 	    }
27829 	  regno = SI_REG;
27830 	}
27831     }
27832 
27833   return gen_rtx_REG (Pmode, regno);
27834 }
27835 
27836 /* Emit RTL insns to initialize the variable parts of a trampoline.
27837    FNDECL is the decl of the target address; M_TRAMP is a MEM for
27838    the trampoline, and CHAIN_VALUE is an RTX for the static chain
27839    to be passed to the target function.  */
27840 
27841 static void
27842 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27843 {
27844   rtx mem, fnaddr;
27845   int opcode;
27846   int offset = 0;
27847 
27848   fnaddr = XEXP (DECL_RTL (fndecl), 0);
27849 
27850   if (TARGET_64BIT)
27851     {
27852       int size;
27853 
27854       /* Load the function address to r11.  Try to load address using
27855 	 the shorter movl instead of movabs.  We may want to support
27856 	 movq for kernel mode, but kernel does not use trampolines at
27857 	 the moment.  FNADDR is a 32bit address and may not be in
27858 	 DImode when ptr_mode == SImode.  Always use movl in this
27859 	 case.  */
27860       if (ptr_mode == SImode
27861 	  || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27862 	{
27863 	  fnaddr = copy_addr_to_reg (fnaddr);
27864 
27865 	  mem = adjust_address (m_tramp, HImode, offset);
27866 	  emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27867 
27868 	  mem = adjust_address (m_tramp, SImode, offset + 2);
27869 	  emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27870 	  offset += 6;
27871 	}
27872       else
27873 	{
27874 	  mem = adjust_address (m_tramp, HImode, offset);
27875 	  emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27876 
27877 	  mem = adjust_address (m_tramp, DImode, offset + 2);
27878 	  emit_move_insn (mem, fnaddr);
27879 	  offset += 10;
27880 	}
27881 
27882       /* Load static chain using movabs to r10.  Use the shorter movl
27883          instead of movabs when ptr_mode == SImode.  */
27884       if (ptr_mode == SImode)
27885 	{
27886 	  opcode = 0xba41;
27887 	  size = 6;
27888 	}
27889       else
27890 	{
27891 	  opcode = 0xba49;
27892 	  size = 10;
27893 	}
27894 
27895       mem = adjust_address (m_tramp, HImode, offset);
27896       emit_move_insn (mem, gen_int_mode (opcode, HImode));
27897 
27898       mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27899       emit_move_insn (mem, chain_value);
27900       offset += size;
27901 
27902       /* Jump to r11; the last (unused) byte is a nop, only there to
27903 	 pad the write out to a single 32-bit store.  */
27904       mem = adjust_address (m_tramp, SImode, offset);
27905       emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27906       offset += 4;
27907     }
27908   else
27909     {
27910       rtx disp, chain;
27911 
27912       /* Depending on the static chain location, either load a register
27913 	 with a constant, or push the constant to the stack.  All of the
27914 	 instructions are the same size.  */
27915       chain = ix86_static_chain (fndecl, true);
27916       if (REG_P (chain))
27917 	{
27918 	  switch (REGNO (chain))
27919 	    {
27920 	    case AX_REG:
27921 	      opcode = 0xb8; break;
27922 	    case CX_REG:
27923 	      opcode = 0xb9; break;
27924 	    default:
27925 	      gcc_unreachable ();
27926 	    }
27927 	}
27928       else
27929 	opcode = 0x68;
27930 
27931       mem = adjust_address (m_tramp, QImode, offset);
27932       emit_move_insn (mem, gen_int_mode (opcode, QImode));
27933 
27934       mem = adjust_address (m_tramp, SImode, offset + 1);
27935       emit_move_insn (mem, chain_value);
27936       offset += 5;
27937 
27938       mem = adjust_address (m_tramp, QImode, offset);
27939       emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27940 
27941       mem = adjust_address (m_tramp, SImode, offset + 1);
27942 
27943       /* Compute offset from the end of the jmp to the target function.
27944 	 In the case in which the trampoline stores the static chain on
27945 	 the stack, we need to skip the first insn which pushes the
27946 	 (call-saved) register static chain; this push is 1 byte.  */
27947       offset += 5;
27948       disp = expand_binop (SImode, sub_optab, fnaddr,
27949 			   plus_constant (Pmode, XEXP (m_tramp, 0),
27950 					  offset - (MEM_P (chain) ? 1 : 0)),
27951 			   NULL_RTX, 1, OPTAB_DIRECT);
27952       emit_move_insn (mem, disp);
27953     }
27954 
27955   gcc_assert (offset <= TRAMPOLINE_SIZE);
27956 
27957 #ifdef HAVE_ENABLE_EXECUTE_STACK
27958 #ifdef CHECK_EXECUTE_STACK_ENABLED
27959   if (CHECK_EXECUTE_STACK_ENABLED)
27960 #endif
27961   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27962 		     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27963 #endif
27964 }
27965 
27966 /* The following file contains several enumerations and data structures
27967    built from the definitions in i386-builtin-types.def.  */
27968 
27969 #include "i386-builtin-types.inc"
27970 
27971 /* Table for the ix86 builtin non-function types.  */
27972 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27973 
27974 /* Retrieve an element from the above table, building some of
27975    the types lazily.  */
27976 
27977 static tree
27978 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27979 {
27980   unsigned int index;
27981   tree type, itype;
27982 
27983   gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27984 
27985   type = ix86_builtin_type_tab[(int) tcode];
27986   if (type != NULL)
27987     return type;
27988 
27989   gcc_assert (tcode > IX86_BT_LAST_PRIM);
27990   if (tcode <= IX86_BT_LAST_VECT)
27991     {
27992       machine_mode mode;
27993 
27994       index = tcode - IX86_BT_LAST_PRIM - 1;
27995       itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27996       mode = ix86_builtin_type_vect_mode[index];
27997 
27998       type = build_vector_type_for_mode (itype, mode);
27999     }
28000   else
28001     {
28002       int quals;
28003 
28004       index = tcode - IX86_BT_LAST_VECT - 1;
28005       if (tcode <= IX86_BT_LAST_PTR)
28006 	quals = TYPE_UNQUALIFIED;
28007       else
28008 	quals = TYPE_QUAL_CONST;
28009 
28010       itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
28011       if (quals != TYPE_UNQUALIFIED)
28012 	itype = build_qualified_type (itype, quals);
28013 
28014       type = build_pointer_type (itype);
28015     }
28016 
28017   ix86_builtin_type_tab[(int) tcode] = type;
28018   return type;
28019 }
28020 
28021 /* Table for the ix86 builtin function types.  */
28022 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
28023 
28024 /* Retrieve an element from the above table, building some of
28025    the types lazily.  */
28026 
28027 static tree
28028 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
28029 {
28030   tree type;
28031 
28032   gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
28033 
28034   type = ix86_builtin_func_type_tab[(int) tcode];
28035   if (type != NULL)
28036     return type;
28037 
28038   if (tcode <= IX86_BT_LAST_FUNC)
28039     {
28040       unsigned start = ix86_builtin_func_start[(int) tcode];
28041       unsigned after = ix86_builtin_func_start[(int) tcode + 1];
28042       tree rtype, atype, args = void_list_node;
28043       unsigned i;
28044 
28045       rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
28046       for (i = after - 1; i > start; --i)
28047 	{
28048 	  atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
28049 	  args = tree_cons (NULL, atype, args);
28050 	}
28051 
28052       type = build_function_type (rtype, args);
28053     }
28054   else
28055     {
28056       unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
28057       enum ix86_builtin_func_type icode;
28058 
28059       icode = ix86_builtin_func_alias_base[index];
28060       type = ix86_get_builtin_func_type (icode);
28061     }
28062 
28063   ix86_builtin_func_type_tab[(int) tcode] = type;
28064   return type;
28065 }
28066 
28067 
28068 /* Codes for all the SSE/MMX builtins.  */
28069 enum ix86_builtins
28070 {
28071   IX86_BUILTIN_ADDPS,
28072   IX86_BUILTIN_ADDSS,
28073   IX86_BUILTIN_DIVPS,
28074   IX86_BUILTIN_DIVSS,
28075   IX86_BUILTIN_MULPS,
28076   IX86_BUILTIN_MULSS,
28077   IX86_BUILTIN_SUBPS,
28078   IX86_BUILTIN_SUBSS,
28079 
28080   IX86_BUILTIN_CMPEQPS,
28081   IX86_BUILTIN_CMPLTPS,
28082   IX86_BUILTIN_CMPLEPS,
28083   IX86_BUILTIN_CMPGTPS,
28084   IX86_BUILTIN_CMPGEPS,
28085   IX86_BUILTIN_CMPNEQPS,
28086   IX86_BUILTIN_CMPNLTPS,
28087   IX86_BUILTIN_CMPNLEPS,
28088   IX86_BUILTIN_CMPNGTPS,
28089   IX86_BUILTIN_CMPNGEPS,
28090   IX86_BUILTIN_CMPORDPS,
28091   IX86_BUILTIN_CMPUNORDPS,
28092   IX86_BUILTIN_CMPEQSS,
28093   IX86_BUILTIN_CMPLTSS,
28094   IX86_BUILTIN_CMPLESS,
28095   IX86_BUILTIN_CMPNEQSS,
28096   IX86_BUILTIN_CMPNLTSS,
28097   IX86_BUILTIN_CMPNLESS,
28098   IX86_BUILTIN_CMPORDSS,
28099   IX86_BUILTIN_CMPUNORDSS,
28100 
28101   IX86_BUILTIN_COMIEQSS,
28102   IX86_BUILTIN_COMILTSS,
28103   IX86_BUILTIN_COMILESS,
28104   IX86_BUILTIN_COMIGTSS,
28105   IX86_BUILTIN_COMIGESS,
28106   IX86_BUILTIN_COMINEQSS,
28107   IX86_BUILTIN_UCOMIEQSS,
28108   IX86_BUILTIN_UCOMILTSS,
28109   IX86_BUILTIN_UCOMILESS,
28110   IX86_BUILTIN_UCOMIGTSS,
28111   IX86_BUILTIN_UCOMIGESS,
28112   IX86_BUILTIN_UCOMINEQSS,
28113 
28114   IX86_BUILTIN_CVTPI2PS,
28115   IX86_BUILTIN_CVTPS2PI,
28116   IX86_BUILTIN_CVTSI2SS,
28117   IX86_BUILTIN_CVTSI642SS,
28118   IX86_BUILTIN_CVTSS2SI,
28119   IX86_BUILTIN_CVTSS2SI64,
28120   IX86_BUILTIN_CVTTPS2PI,
28121   IX86_BUILTIN_CVTTSS2SI,
28122   IX86_BUILTIN_CVTTSS2SI64,
28123 
28124   IX86_BUILTIN_MAXPS,
28125   IX86_BUILTIN_MAXSS,
28126   IX86_BUILTIN_MINPS,
28127   IX86_BUILTIN_MINSS,
28128 
28129   IX86_BUILTIN_LOADUPS,
28130   IX86_BUILTIN_STOREUPS,
28131   IX86_BUILTIN_MOVSS,
28132 
28133   IX86_BUILTIN_MOVHLPS,
28134   IX86_BUILTIN_MOVLHPS,
28135   IX86_BUILTIN_LOADHPS,
28136   IX86_BUILTIN_LOADLPS,
28137   IX86_BUILTIN_STOREHPS,
28138   IX86_BUILTIN_STORELPS,
28139 
28140   IX86_BUILTIN_MASKMOVQ,
28141   IX86_BUILTIN_MOVMSKPS,
28142   IX86_BUILTIN_PMOVMSKB,
28143 
28144   IX86_BUILTIN_MOVNTPS,
28145   IX86_BUILTIN_MOVNTQ,
28146 
28147   IX86_BUILTIN_LOADDQU,
28148   IX86_BUILTIN_STOREDQU,
28149 
28150   IX86_BUILTIN_PACKSSWB,
28151   IX86_BUILTIN_PACKSSDW,
28152   IX86_BUILTIN_PACKUSWB,
28153 
28154   IX86_BUILTIN_PADDB,
28155   IX86_BUILTIN_PADDW,
28156   IX86_BUILTIN_PADDD,
28157   IX86_BUILTIN_PADDQ,
28158   IX86_BUILTIN_PADDSB,
28159   IX86_BUILTIN_PADDSW,
28160   IX86_BUILTIN_PADDUSB,
28161   IX86_BUILTIN_PADDUSW,
28162   IX86_BUILTIN_PSUBB,
28163   IX86_BUILTIN_PSUBW,
28164   IX86_BUILTIN_PSUBD,
28165   IX86_BUILTIN_PSUBQ,
28166   IX86_BUILTIN_PSUBSB,
28167   IX86_BUILTIN_PSUBSW,
28168   IX86_BUILTIN_PSUBUSB,
28169   IX86_BUILTIN_PSUBUSW,
28170 
28171   IX86_BUILTIN_PAND,
28172   IX86_BUILTIN_PANDN,
28173   IX86_BUILTIN_POR,
28174   IX86_BUILTIN_PXOR,
28175 
28176   IX86_BUILTIN_PAVGB,
28177   IX86_BUILTIN_PAVGW,
28178 
28179   IX86_BUILTIN_PCMPEQB,
28180   IX86_BUILTIN_PCMPEQW,
28181   IX86_BUILTIN_PCMPEQD,
28182   IX86_BUILTIN_PCMPGTB,
28183   IX86_BUILTIN_PCMPGTW,
28184   IX86_BUILTIN_PCMPGTD,
28185 
28186   IX86_BUILTIN_PMADDWD,
28187 
28188   IX86_BUILTIN_PMAXSW,
28189   IX86_BUILTIN_PMAXUB,
28190   IX86_BUILTIN_PMINSW,
28191   IX86_BUILTIN_PMINUB,
28192 
28193   IX86_BUILTIN_PMULHUW,
28194   IX86_BUILTIN_PMULHW,
28195   IX86_BUILTIN_PMULLW,
28196 
28197   IX86_BUILTIN_PSADBW,
28198   IX86_BUILTIN_PSHUFW,
28199 
28200   IX86_BUILTIN_PSLLW,
28201   IX86_BUILTIN_PSLLD,
28202   IX86_BUILTIN_PSLLQ,
28203   IX86_BUILTIN_PSRAW,
28204   IX86_BUILTIN_PSRAD,
28205   IX86_BUILTIN_PSRLW,
28206   IX86_BUILTIN_PSRLD,
28207   IX86_BUILTIN_PSRLQ,
28208   IX86_BUILTIN_PSLLWI,
28209   IX86_BUILTIN_PSLLDI,
28210   IX86_BUILTIN_PSLLQI,
28211   IX86_BUILTIN_PSRAWI,
28212   IX86_BUILTIN_PSRADI,
28213   IX86_BUILTIN_PSRLWI,
28214   IX86_BUILTIN_PSRLDI,
28215   IX86_BUILTIN_PSRLQI,
28216 
28217   IX86_BUILTIN_PUNPCKHBW,
28218   IX86_BUILTIN_PUNPCKHWD,
28219   IX86_BUILTIN_PUNPCKHDQ,
28220   IX86_BUILTIN_PUNPCKLBW,
28221   IX86_BUILTIN_PUNPCKLWD,
28222   IX86_BUILTIN_PUNPCKLDQ,
28223 
28224   IX86_BUILTIN_SHUFPS,
28225 
28226   IX86_BUILTIN_RCPPS,
28227   IX86_BUILTIN_RCPSS,
28228   IX86_BUILTIN_RSQRTPS,
28229   IX86_BUILTIN_RSQRTPS_NR,
28230   IX86_BUILTIN_RSQRTSS,
28231   IX86_BUILTIN_RSQRTF,
28232   IX86_BUILTIN_SQRTPS,
28233   IX86_BUILTIN_SQRTPS_NR,
28234   IX86_BUILTIN_SQRTSS,
28235 
28236   IX86_BUILTIN_UNPCKHPS,
28237   IX86_BUILTIN_UNPCKLPS,
28238 
28239   IX86_BUILTIN_ANDPS,
28240   IX86_BUILTIN_ANDNPS,
28241   IX86_BUILTIN_ORPS,
28242   IX86_BUILTIN_XORPS,
28243 
28244   IX86_BUILTIN_EMMS,
28245   IX86_BUILTIN_LDMXCSR,
28246   IX86_BUILTIN_STMXCSR,
28247   IX86_BUILTIN_SFENCE,
28248 
28249   IX86_BUILTIN_FXSAVE,
28250   IX86_BUILTIN_FXRSTOR,
28251   IX86_BUILTIN_FXSAVE64,
28252   IX86_BUILTIN_FXRSTOR64,
28253 
28254   IX86_BUILTIN_XSAVE,
28255   IX86_BUILTIN_XRSTOR,
28256   IX86_BUILTIN_XSAVE64,
28257   IX86_BUILTIN_XRSTOR64,
28258 
28259   IX86_BUILTIN_XSAVEOPT,
28260   IX86_BUILTIN_XSAVEOPT64,
28261 
28262   IX86_BUILTIN_XSAVEC,
28263   IX86_BUILTIN_XSAVEC64,
28264 
28265   IX86_BUILTIN_XSAVES,
28266   IX86_BUILTIN_XRSTORS,
28267   IX86_BUILTIN_XSAVES64,
28268   IX86_BUILTIN_XRSTORS64,
28269 
28270   /* 3DNow! Original */
28271   IX86_BUILTIN_FEMMS,
28272   IX86_BUILTIN_PAVGUSB,
28273   IX86_BUILTIN_PF2ID,
28274   IX86_BUILTIN_PFACC,
28275   IX86_BUILTIN_PFADD,
28276   IX86_BUILTIN_PFCMPEQ,
28277   IX86_BUILTIN_PFCMPGE,
28278   IX86_BUILTIN_PFCMPGT,
28279   IX86_BUILTIN_PFMAX,
28280   IX86_BUILTIN_PFMIN,
28281   IX86_BUILTIN_PFMUL,
28282   IX86_BUILTIN_PFRCP,
28283   IX86_BUILTIN_PFRCPIT1,
28284   IX86_BUILTIN_PFRCPIT2,
28285   IX86_BUILTIN_PFRSQIT1,
28286   IX86_BUILTIN_PFRSQRT,
28287   IX86_BUILTIN_PFSUB,
28288   IX86_BUILTIN_PFSUBR,
28289   IX86_BUILTIN_PI2FD,
28290   IX86_BUILTIN_PMULHRW,
28291 
28292   /* 3DNow! Athlon Extensions */
28293   IX86_BUILTIN_PF2IW,
28294   IX86_BUILTIN_PFNACC,
28295   IX86_BUILTIN_PFPNACC,
28296   IX86_BUILTIN_PI2FW,
28297   IX86_BUILTIN_PSWAPDSI,
28298   IX86_BUILTIN_PSWAPDSF,
28299 
28300   /* SSE2 */
28301   IX86_BUILTIN_ADDPD,
28302   IX86_BUILTIN_ADDSD,
28303   IX86_BUILTIN_DIVPD,
28304   IX86_BUILTIN_DIVSD,
28305   IX86_BUILTIN_MULPD,
28306   IX86_BUILTIN_MULSD,
28307   IX86_BUILTIN_SUBPD,
28308   IX86_BUILTIN_SUBSD,
28309 
28310   IX86_BUILTIN_CMPEQPD,
28311   IX86_BUILTIN_CMPLTPD,
28312   IX86_BUILTIN_CMPLEPD,
28313   IX86_BUILTIN_CMPGTPD,
28314   IX86_BUILTIN_CMPGEPD,
28315   IX86_BUILTIN_CMPNEQPD,
28316   IX86_BUILTIN_CMPNLTPD,
28317   IX86_BUILTIN_CMPNLEPD,
28318   IX86_BUILTIN_CMPNGTPD,
28319   IX86_BUILTIN_CMPNGEPD,
28320   IX86_BUILTIN_CMPORDPD,
28321   IX86_BUILTIN_CMPUNORDPD,
28322   IX86_BUILTIN_CMPEQSD,
28323   IX86_BUILTIN_CMPLTSD,
28324   IX86_BUILTIN_CMPLESD,
28325   IX86_BUILTIN_CMPNEQSD,
28326   IX86_BUILTIN_CMPNLTSD,
28327   IX86_BUILTIN_CMPNLESD,
28328   IX86_BUILTIN_CMPORDSD,
28329   IX86_BUILTIN_CMPUNORDSD,
28330 
28331   IX86_BUILTIN_COMIEQSD,
28332   IX86_BUILTIN_COMILTSD,
28333   IX86_BUILTIN_COMILESD,
28334   IX86_BUILTIN_COMIGTSD,
28335   IX86_BUILTIN_COMIGESD,
28336   IX86_BUILTIN_COMINEQSD,
28337   IX86_BUILTIN_UCOMIEQSD,
28338   IX86_BUILTIN_UCOMILTSD,
28339   IX86_BUILTIN_UCOMILESD,
28340   IX86_BUILTIN_UCOMIGTSD,
28341   IX86_BUILTIN_UCOMIGESD,
28342   IX86_BUILTIN_UCOMINEQSD,
28343 
28344   IX86_BUILTIN_MAXPD,
28345   IX86_BUILTIN_MAXSD,
28346   IX86_BUILTIN_MINPD,
28347   IX86_BUILTIN_MINSD,
28348 
28349   IX86_BUILTIN_ANDPD,
28350   IX86_BUILTIN_ANDNPD,
28351   IX86_BUILTIN_ORPD,
28352   IX86_BUILTIN_XORPD,
28353 
28354   IX86_BUILTIN_SQRTPD,
28355   IX86_BUILTIN_SQRTSD,
28356 
28357   IX86_BUILTIN_UNPCKHPD,
28358   IX86_BUILTIN_UNPCKLPD,
28359 
28360   IX86_BUILTIN_SHUFPD,
28361 
28362   IX86_BUILTIN_LOADUPD,
28363   IX86_BUILTIN_STOREUPD,
28364   IX86_BUILTIN_MOVSD,
28365 
28366   IX86_BUILTIN_LOADHPD,
28367   IX86_BUILTIN_LOADLPD,
28368 
28369   IX86_BUILTIN_CVTDQ2PD,
28370   IX86_BUILTIN_CVTDQ2PS,
28371 
28372   IX86_BUILTIN_CVTPD2DQ,
28373   IX86_BUILTIN_CVTPD2PI,
28374   IX86_BUILTIN_CVTPD2PS,
28375   IX86_BUILTIN_CVTTPD2DQ,
28376   IX86_BUILTIN_CVTTPD2PI,
28377 
28378   IX86_BUILTIN_CVTPI2PD,
28379   IX86_BUILTIN_CVTSI2SD,
28380   IX86_BUILTIN_CVTSI642SD,
28381 
28382   IX86_BUILTIN_CVTSD2SI,
28383   IX86_BUILTIN_CVTSD2SI64,
28384   IX86_BUILTIN_CVTSD2SS,
28385   IX86_BUILTIN_CVTSS2SD,
28386   IX86_BUILTIN_CVTTSD2SI,
28387   IX86_BUILTIN_CVTTSD2SI64,
28388 
28389   IX86_BUILTIN_CVTPS2DQ,
28390   IX86_BUILTIN_CVTPS2PD,
28391   IX86_BUILTIN_CVTTPS2DQ,
28392 
28393   IX86_BUILTIN_MOVNTI,
28394   IX86_BUILTIN_MOVNTI64,
28395   IX86_BUILTIN_MOVNTPD,
28396   IX86_BUILTIN_MOVNTDQ,
28397 
28398   IX86_BUILTIN_MOVQ128,
28399 
28400   /* SSE2 MMX */
28401   IX86_BUILTIN_MASKMOVDQU,
28402   IX86_BUILTIN_MOVMSKPD,
28403   IX86_BUILTIN_PMOVMSKB128,
28404 
28405   IX86_BUILTIN_PACKSSWB128,
28406   IX86_BUILTIN_PACKSSDW128,
28407   IX86_BUILTIN_PACKUSWB128,
28408 
28409   IX86_BUILTIN_PADDB128,
28410   IX86_BUILTIN_PADDW128,
28411   IX86_BUILTIN_PADDD128,
28412   IX86_BUILTIN_PADDQ128,
28413   IX86_BUILTIN_PADDSB128,
28414   IX86_BUILTIN_PADDSW128,
28415   IX86_BUILTIN_PADDUSB128,
28416   IX86_BUILTIN_PADDUSW128,
28417   IX86_BUILTIN_PSUBB128,
28418   IX86_BUILTIN_PSUBW128,
28419   IX86_BUILTIN_PSUBD128,
28420   IX86_BUILTIN_PSUBQ128,
28421   IX86_BUILTIN_PSUBSB128,
28422   IX86_BUILTIN_PSUBSW128,
28423   IX86_BUILTIN_PSUBUSB128,
28424   IX86_BUILTIN_PSUBUSW128,
28425 
28426   IX86_BUILTIN_PAND128,
28427   IX86_BUILTIN_PANDN128,
28428   IX86_BUILTIN_POR128,
28429   IX86_BUILTIN_PXOR128,
28430 
28431   IX86_BUILTIN_PAVGB128,
28432   IX86_BUILTIN_PAVGW128,
28433 
28434   IX86_BUILTIN_PCMPEQB128,
28435   IX86_BUILTIN_PCMPEQW128,
28436   IX86_BUILTIN_PCMPEQD128,
28437   IX86_BUILTIN_PCMPGTB128,
28438   IX86_BUILTIN_PCMPGTW128,
28439   IX86_BUILTIN_PCMPGTD128,
28440 
28441   IX86_BUILTIN_PMADDWD128,
28442 
28443   IX86_BUILTIN_PMAXSW128,
28444   IX86_BUILTIN_PMAXUB128,
28445   IX86_BUILTIN_PMINSW128,
28446   IX86_BUILTIN_PMINUB128,
28447 
28448   IX86_BUILTIN_PMULUDQ,
28449   IX86_BUILTIN_PMULUDQ128,
28450   IX86_BUILTIN_PMULHUW128,
28451   IX86_BUILTIN_PMULHW128,
28452   IX86_BUILTIN_PMULLW128,
28453 
28454   IX86_BUILTIN_PSADBW128,
28455   IX86_BUILTIN_PSHUFHW,
28456   IX86_BUILTIN_PSHUFLW,
28457   IX86_BUILTIN_PSHUFD,
28458 
28459   IX86_BUILTIN_PSLLDQI128,
28460   IX86_BUILTIN_PSLLWI128,
28461   IX86_BUILTIN_PSLLDI128,
28462   IX86_BUILTIN_PSLLQI128,
28463   IX86_BUILTIN_PSRAWI128,
28464   IX86_BUILTIN_PSRADI128,
28465   IX86_BUILTIN_PSRLDQI128,
28466   IX86_BUILTIN_PSRLWI128,
28467   IX86_BUILTIN_PSRLDI128,
28468   IX86_BUILTIN_PSRLQI128,
28469 
28470   IX86_BUILTIN_PSLLDQ128,
28471   IX86_BUILTIN_PSLLW128,
28472   IX86_BUILTIN_PSLLD128,
28473   IX86_BUILTIN_PSLLQ128,
28474   IX86_BUILTIN_PSRAW128,
28475   IX86_BUILTIN_PSRAD128,
28476   IX86_BUILTIN_PSRLW128,
28477   IX86_BUILTIN_PSRLD128,
28478   IX86_BUILTIN_PSRLQ128,
28479 
28480   IX86_BUILTIN_PUNPCKHBW128,
28481   IX86_BUILTIN_PUNPCKHWD128,
28482   IX86_BUILTIN_PUNPCKHDQ128,
28483   IX86_BUILTIN_PUNPCKHQDQ128,
28484   IX86_BUILTIN_PUNPCKLBW128,
28485   IX86_BUILTIN_PUNPCKLWD128,
28486   IX86_BUILTIN_PUNPCKLDQ128,
28487   IX86_BUILTIN_PUNPCKLQDQ128,
28488 
28489   IX86_BUILTIN_CLFLUSH,
28490   IX86_BUILTIN_MFENCE,
28491   IX86_BUILTIN_LFENCE,
28492   IX86_BUILTIN_PAUSE,
28493 
28494   IX86_BUILTIN_FNSTENV,
28495   IX86_BUILTIN_FLDENV,
28496   IX86_BUILTIN_FNSTSW,
28497   IX86_BUILTIN_FNCLEX,
28498 
28499   IX86_BUILTIN_BSRSI,
28500   IX86_BUILTIN_BSRDI,
28501   IX86_BUILTIN_RDPMC,
28502   IX86_BUILTIN_RDTSC,
28503   IX86_BUILTIN_RDTSCP,
28504   IX86_BUILTIN_ROLQI,
28505   IX86_BUILTIN_ROLHI,
28506   IX86_BUILTIN_RORQI,
28507   IX86_BUILTIN_RORHI,
28508 
28509   /* SSE3.  */
28510   IX86_BUILTIN_ADDSUBPS,
28511   IX86_BUILTIN_HADDPS,
28512   IX86_BUILTIN_HSUBPS,
28513   IX86_BUILTIN_MOVSHDUP,
28514   IX86_BUILTIN_MOVSLDUP,
28515   IX86_BUILTIN_ADDSUBPD,
28516   IX86_BUILTIN_HADDPD,
28517   IX86_BUILTIN_HSUBPD,
28518   IX86_BUILTIN_LDDQU,
28519 
28520   IX86_BUILTIN_MONITOR,
28521   IX86_BUILTIN_MWAIT,
28522 
28523   /* SSSE3.  */
28524   IX86_BUILTIN_PHADDW,
28525   IX86_BUILTIN_PHADDD,
28526   IX86_BUILTIN_PHADDSW,
28527   IX86_BUILTIN_PHSUBW,
28528   IX86_BUILTIN_PHSUBD,
28529   IX86_BUILTIN_PHSUBSW,
28530   IX86_BUILTIN_PMADDUBSW,
28531   IX86_BUILTIN_PMULHRSW,
28532   IX86_BUILTIN_PSHUFB,
28533   IX86_BUILTIN_PSIGNB,
28534   IX86_BUILTIN_PSIGNW,
28535   IX86_BUILTIN_PSIGND,
28536   IX86_BUILTIN_PALIGNR,
28537   IX86_BUILTIN_PABSB,
28538   IX86_BUILTIN_PABSW,
28539   IX86_BUILTIN_PABSD,
28540 
28541   IX86_BUILTIN_PHADDW128,
28542   IX86_BUILTIN_PHADDD128,
28543   IX86_BUILTIN_PHADDSW128,
28544   IX86_BUILTIN_PHSUBW128,
28545   IX86_BUILTIN_PHSUBD128,
28546   IX86_BUILTIN_PHSUBSW128,
28547   IX86_BUILTIN_PMADDUBSW128,
28548   IX86_BUILTIN_PMULHRSW128,
28549   IX86_BUILTIN_PSHUFB128,
28550   IX86_BUILTIN_PSIGNB128,
28551   IX86_BUILTIN_PSIGNW128,
28552   IX86_BUILTIN_PSIGND128,
28553   IX86_BUILTIN_PALIGNR128,
28554   IX86_BUILTIN_PABSB128,
28555   IX86_BUILTIN_PABSW128,
28556   IX86_BUILTIN_PABSD128,
28557 
28558   /* AMDFAM10 - SSE4A New Instructions.  */
28559   IX86_BUILTIN_MOVNTSD,
28560   IX86_BUILTIN_MOVNTSS,
28561   IX86_BUILTIN_EXTRQI,
28562   IX86_BUILTIN_EXTRQ,
28563   IX86_BUILTIN_INSERTQI,
28564   IX86_BUILTIN_INSERTQ,
28565 
28566   /* SSE4.1.  */
28567   IX86_BUILTIN_BLENDPD,
28568   IX86_BUILTIN_BLENDPS,
28569   IX86_BUILTIN_BLENDVPD,
28570   IX86_BUILTIN_BLENDVPS,
28571   IX86_BUILTIN_PBLENDVB128,
28572   IX86_BUILTIN_PBLENDW128,
28573 
28574   IX86_BUILTIN_DPPD,
28575   IX86_BUILTIN_DPPS,
28576 
28577   IX86_BUILTIN_INSERTPS128,
28578 
28579   IX86_BUILTIN_MOVNTDQA,
28580   IX86_BUILTIN_MPSADBW128,
28581   IX86_BUILTIN_PACKUSDW128,
28582   IX86_BUILTIN_PCMPEQQ,
28583   IX86_BUILTIN_PHMINPOSUW128,
28584 
28585   IX86_BUILTIN_PMAXSB128,
28586   IX86_BUILTIN_PMAXSD128,
28587   IX86_BUILTIN_PMAXUD128,
28588   IX86_BUILTIN_PMAXUW128,
28589 
28590   IX86_BUILTIN_PMINSB128,
28591   IX86_BUILTIN_PMINSD128,
28592   IX86_BUILTIN_PMINUD128,
28593   IX86_BUILTIN_PMINUW128,
28594 
28595   IX86_BUILTIN_PMOVSXBW128,
28596   IX86_BUILTIN_PMOVSXBD128,
28597   IX86_BUILTIN_PMOVSXBQ128,
28598   IX86_BUILTIN_PMOVSXWD128,
28599   IX86_BUILTIN_PMOVSXWQ128,
28600   IX86_BUILTIN_PMOVSXDQ128,
28601 
28602   IX86_BUILTIN_PMOVZXBW128,
28603   IX86_BUILTIN_PMOVZXBD128,
28604   IX86_BUILTIN_PMOVZXBQ128,
28605   IX86_BUILTIN_PMOVZXWD128,
28606   IX86_BUILTIN_PMOVZXWQ128,
28607   IX86_BUILTIN_PMOVZXDQ128,
28608 
28609   IX86_BUILTIN_PMULDQ128,
28610   IX86_BUILTIN_PMULLD128,
28611 
28612   IX86_BUILTIN_ROUNDSD,
28613   IX86_BUILTIN_ROUNDSS,
28614 
28615   IX86_BUILTIN_ROUNDPD,
28616   IX86_BUILTIN_ROUNDPS,
28617 
28618   IX86_BUILTIN_FLOORPD,
28619   IX86_BUILTIN_CEILPD,
28620   IX86_BUILTIN_TRUNCPD,
28621   IX86_BUILTIN_RINTPD,
28622   IX86_BUILTIN_ROUNDPD_AZ,
28623 
28624   IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28625   IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28626   IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28627 
28628   IX86_BUILTIN_FLOORPS,
28629   IX86_BUILTIN_CEILPS,
28630   IX86_BUILTIN_TRUNCPS,
28631   IX86_BUILTIN_RINTPS,
28632   IX86_BUILTIN_ROUNDPS_AZ,
28633 
28634   IX86_BUILTIN_FLOORPS_SFIX,
28635   IX86_BUILTIN_CEILPS_SFIX,
28636   IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28637 
28638   IX86_BUILTIN_PTESTZ,
28639   IX86_BUILTIN_PTESTC,
28640   IX86_BUILTIN_PTESTNZC,
28641 
28642   IX86_BUILTIN_VEC_INIT_V2SI,
28643   IX86_BUILTIN_VEC_INIT_V4HI,
28644   IX86_BUILTIN_VEC_INIT_V8QI,
28645   IX86_BUILTIN_VEC_EXT_V2DF,
28646   IX86_BUILTIN_VEC_EXT_V2DI,
28647   IX86_BUILTIN_VEC_EXT_V4SF,
28648   IX86_BUILTIN_VEC_EXT_V4SI,
28649   IX86_BUILTIN_VEC_EXT_V8HI,
28650   IX86_BUILTIN_VEC_EXT_V2SI,
28651   IX86_BUILTIN_VEC_EXT_V4HI,
28652   IX86_BUILTIN_VEC_EXT_V16QI,
28653   IX86_BUILTIN_VEC_SET_V2DI,
28654   IX86_BUILTIN_VEC_SET_V4SF,
28655   IX86_BUILTIN_VEC_SET_V4SI,
28656   IX86_BUILTIN_VEC_SET_V8HI,
28657   IX86_BUILTIN_VEC_SET_V4HI,
28658   IX86_BUILTIN_VEC_SET_V16QI,
28659 
28660   IX86_BUILTIN_VEC_PACK_SFIX,
28661   IX86_BUILTIN_VEC_PACK_SFIX256,
28662 
28663   /* SSE4.2.  */
28664   IX86_BUILTIN_CRC32QI,
28665   IX86_BUILTIN_CRC32HI,
28666   IX86_BUILTIN_CRC32SI,
28667   IX86_BUILTIN_CRC32DI,
28668 
28669   IX86_BUILTIN_PCMPESTRI128,
28670   IX86_BUILTIN_PCMPESTRM128,
28671   IX86_BUILTIN_PCMPESTRA128,
28672   IX86_BUILTIN_PCMPESTRC128,
28673   IX86_BUILTIN_PCMPESTRO128,
28674   IX86_BUILTIN_PCMPESTRS128,
28675   IX86_BUILTIN_PCMPESTRZ128,
28676   IX86_BUILTIN_PCMPISTRI128,
28677   IX86_BUILTIN_PCMPISTRM128,
28678   IX86_BUILTIN_PCMPISTRA128,
28679   IX86_BUILTIN_PCMPISTRC128,
28680   IX86_BUILTIN_PCMPISTRO128,
28681   IX86_BUILTIN_PCMPISTRS128,
28682   IX86_BUILTIN_PCMPISTRZ128,
28683 
28684   IX86_BUILTIN_PCMPGTQ,
28685 
28686   /* AES instructions */
28687   IX86_BUILTIN_AESENC128,
28688   IX86_BUILTIN_AESENCLAST128,
28689   IX86_BUILTIN_AESDEC128,
28690   IX86_BUILTIN_AESDECLAST128,
28691   IX86_BUILTIN_AESIMC128,
28692   IX86_BUILTIN_AESKEYGENASSIST128,
28693 
28694   /* PCLMUL instruction */
28695   IX86_BUILTIN_PCLMULQDQ128,
28696 
28697   /* AVX */
28698   IX86_BUILTIN_ADDPD256,
28699   IX86_BUILTIN_ADDPS256,
28700   IX86_BUILTIN_ADDSUBPD256,
28701   IX86_BUILTIN_ADDSUBPS256,
28702   IX86_BUILTIN_ANDPD256,
28703   IX86_BUILTIN_ANDPS256,
28704   IX86_BUILTIN_ANDNPD256,
28705   IX86_BUILTIN_ANDNPS256,
28706   IX86_BUILTIN_BLENDPD256,
28707   IX86_BUILTIN_BLENDPS256,
28708   IX86_BUILTIN_BLENDVPD256,
28709   IX86_BUILTIN_BLENDVPS256,
28710   IX86_BUILTIN_DIVPD256,
28711   IX86_BUILTIN_DIVPS256,
28712   IX86_BUILTIN_DPPS256,
28713   IX86_BUILTIN_HADDPD256,
28714   IX86_BUILTIN_HADDPS256,
28715   IX86_BUILTIN_HSUBPD256,
28716   IX86_BUILTIN_HSUBPS256,
28717   IX86_BUILTIN_MAXPD256,
28718   IX86_BUILTIN_MAXPS256,
28719   IX86_BUILTIN_MINPD256,
28720   IX86_BUILTIN_MINPS256,
28721   IX86_BUILTIN_MULPD256,
28722   IX86_BUILTIN_MULPS256,
28723   IX86_BUILTIN_ORPD256,
28724   IX86_BUILTIN_ORPS256,
28725   IX86_BUILTIN_SHUFPD256,
28726   IX86_BUILTIN_SHUFPS256,
28727   IX86_BUILTIN_SUBPD256,
28728   IX86_BUILTIN_SUBPS256,
28729   IX86_BUILTIN_XORPD256,
28730   IX86_BUILTIN_XORPS256,
28731   IX86_BUILTIN_CMPSD,
28732   IX86_BUILTIN_CMPSS,
28733   IX86_BUILTIN_CMPPD,
28734   IX86_BUILTIN_CMPPS,
28735   IX86_BUILTIN_CMPPD256,
28736   IX86_BUILTIN_CMPPS256,
28737   IX86_BUILTIN_CVTDQ2PD256,
28738   IX86_BUILTIN_CVTDQ2PS256,
28739   IX86_BUILTIN_CVTPD2PS256,
28740   IX86_BUILTIN_CVTPS2DQ256,
28741   IX86_BUILTIN_CVTPS2PD256,
28742   IX86_BUILTIN_CVTTPD2DQ256,
28743   IX86_BUILTIN_CVTPD2DQ256,
28744   IX86_BUILTIN_CVTTPS2DQ256,
28745   IX86_BUILTIN_EXTRACTF128PD256,
28746   IX86_BUILTIN_EXTRACTF128PS256,
28747   IX86_BUILTIN_EXTRACTF128SI256,
28748   IX86_BUILTIN_VZEROALL,
28749   IX86_BUILTIN_VZEROUPPER,
28750   IX86_BUILTIN_VPERMILVARPD,
28751   IX86_BUILTIN_VPERMILVARPS,
28752   IX86_BUILTIN_VPERMILVARPD256,
28753   IX86_BUILTIN_VPERMILVARPS256,
28754   IX86_BUILTIN_VPERMILPD,
28755   IX86_BUILTIN_VPERMILPS,
28756   IX86_BUILTIN_VPERMILPD256,
28757   IX86_BUILTIN_VPERMILPS256,
28758   IX86_BUILTIN_VPERMIL2PD,
28759   IX86_BUILTIN_VPERMIL2PS,
28760   IX86_BUILTIN_VPERMIL2PD256,
28761   IX86_BUILTIN_VPERMIL2PS256,
28762   IX86_BUILTIN_VPERM2F128PD256,
28763   IX86_BUILTIN_VPERM2F128PS256,
28764   IX86_BUILTIN_VPERM2F128SI256,
28765   IX86_BUILTIN_VBROADCASTSS,
28766   IX86_BUILTIN_VBROADCASTSD256,
28767   IX86_BUILTIN_VBROADCASTSS256,
28768   IX86_BUILTIN_VBROADCASTPD256,
28769   IX86_BUILTIN_VBROADCASTPS256,
28770   IX86_BUILTIN_VINSERTF128PD256,
28771   IX86_BUILTIN_VINSERTF128PS256,
28772   IX86_BUILTIN_VINSERTF128SI256,
28773   IX86_BUILTIN_LOADUPD256,
28774   IX86_BUILTIN_LOADUPS256,
28775   IX86_BUILTIN_STOREUPD256,
28776   IX86_BUILTIN_STOREUPS256,
28777   IX86_BUILTIN_LDDQU256,
28778   IX86_BUILTIN_MOVNTDQ256,
28779   IX86_BUILTIN_MOVNTPD256,
28780   IX86_BUILTIN_MOVNTPS256,
28781   IX86_BUILTIN_LOADDQU256,
28782   IX86_BUILTIN_STOREDQU256,
28783   IX86_BUILTIN_MASKLOADPD,
28784   IX86_BUILTIN_MASKLOADPS,
28785   IX86_BUILTIN_MASKSTOREPD,
28786   IX86_BUILTIN_MASKSTOREPS,
28787   IX86_BUILTIN_MASKLOADPD256,
28788   IX86_BUILTIN_MASKLOADPS256,
28789   IX86_BUILTIN_MASKSTOREPD256,
28790   IX86_BUILTIN_MASKSTOREPS256,
28791   IX86_BUILTIN_MOVSHDUP256,
28792   IX86_BUILTIN_MOVSLDUP256,
28793   IX86_BUILTIN_MOVDDUP256,
28794 
28795   IX86_BUILTIN_SQRTPD256,
28796   IX86_BUILTIN_SQRTPS256,
28797   IX86_BUILTIN_SQRTPS_NR256,
28798   IX86_BUILTIN_RSQRTPS256,
28799   IX86_BUILTIN_RSQRTPS_NR256,
28800 
28801   IX86_BUILTIN_RCPPS256,
28802 
28803   IX86_BUILTIN_ROUNDPD256,
28804   IX86_BUILTIN_ROUNDPS256,
28805 
28806   IX86_BUILTIN_FLOORPD256,
28807   IX86_BUILTIN_CEILPD256,
28808   IX86_BUILTIN_TRUNCPD256,
28809   IX86_BUILTIN_RINTPD256,
28810   IX86_BUILTIN_ROUNDPD_AZ256,
28811 
28812   IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28813   IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28814   IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28815 
28816   IX86_BUILTIN_FLOORPS256,
28817   IX86_BUILTIN_CEILPS256,
28818   IX86_BUILTIN_TRUNCPS256,
28819   IX86_BUILTIN_RINTPS256,
28820   IX86_BUILTIN_ROUNDPS_AZ256,
28821 
28822   IX86_BUILTIN_FLOORPS_SFIX256,
28823   IX86_BUILTIN_CEILPS_SFIX256,
28824   IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28825 
28826   IX86_BUILTIN_UNPCKHPD256,
28827   IX86_BUILTIN_UNPCKLPD256,
28828   IX86_BUILTIN_UNPCKHPS256,
28829   IX86_BUILTIN_UNPCKLPS256,
28830 
28831   IX86_BUILTIN_SI256_SI,
28832   IX86_BUILTIN_PS256_PS,
28833   IX86_BUILTIN_PD256_PD,
28834   IX86_BUILTIN_SI_SI256,
28835   IX86_BUILTIN_PS_PS256,
28836   IX86_BUILTIN_PD_PD256,
28837 
28838   IX86_BUILTIN_VTESTZPD,
28839   IX86_BUILTIN_VTESTCPD,
28840   IX86_BUILTIN_VTESTNZCPD,
28841   IX86_BUILTIN_VTESTZPS,
28842   IX86_BUILTIN_VTESTCPS,
28843   IX86_BUILTIN_VTESTNZCPS,
28844   IX86_BUILTIN_VTESTZPD256,
28845   IX86_BUILTIN_VTESTCPD256,
28846   IX86_BUILTIN_VTESTNZCPD256,
28847   IX86_BUILTIN_VTESTZPS256,
28848   IX86_BUILTIN_VTESTCPS256,
28849   IX86_BUILTIN_VTESTNZCPS256,
28850   IX86_BUILTIN_PTESTZ256,
28851   IX86_BUILTIN_PTESTC256,
28852   IX86_BUILTIN_PTESTNZC256,
28853 
28854   IX86_BUILTIN_MOVMSKPD256,
28855   IX86_BUILTIN_MOVMSKPS256,
28856 
28857   /* AVX2 */
28858   IX86_BUILTIN_MPSADBW256,
28859   IX86_BUILTIN_PABSB256,
28860   IX86_BUILTIN_PABSW256,
28861   IX86_BUILTIN_PABSD256,
28862   IX86_BUILTIN_PACKSSDW256,
28863   IX86_BUILTIN_PACKSSWB256,
28864   IX86_BUILTIN_PACKUSDW256,
28865   IX86_BUILTIN_PACKUSWB256,
28866   IX86_BUILTIN_PADDB256,
28867   IX86_BUILTIN_PADDW256,
28868   IX86_BUILTIN_PADDD256,
28869   IX86_BUILTIN_PADDQ256,
28870   IX86_BUILTIN_PADDSB256,
28871   IX86_BUILTIN_PADDSW256,
28872   IX86_BUILTIN_PADDUSB256,
28873   IX86_BUILTIN_PADDUSW256,
28874   IX86_BUILTIN_PALIGNR256,
28875   IX86_BUILTIN_AND256I,
28876   IX86_BUILTIN_ANDNOT256I,
28877   IX86_BUILTIN_PAVGB256,
28878   IX86_BUILTIN_PAVGW256,
28879   IX86_BUILTIN_PBLENDVB256,
28880   IX86_BUILTIN_PBLENDVW256,
28881   IX86_BUILTIN_PCMPEQB256,
28882   IX86_BUILTIN_PCMPEQW256,
28883   IX86_BUILTIN_PCMPEQD256,
28884   IX86_BUILTIN_PCMPEQQ256,
28885   IX86_BUILTIN_PCMPGTB256,
28886   IX86_BUILTIN_PCMPGTW256,
28887   IX86_BUILTIN_PCMPGTD256,
28888   IX86_BUILTIN_PCMPGTQ256,
28889   IX86_BUILTIN_PHADDW256,
28890   IX86_BUILTIN_PHADDD256,
28891   IX86_BUILTIN_PHADDSW256,
28892   IX86_BUILTIN_PHSUBW256,
28893   IX86_BUILTIN_PHSUBD256,
28894   IX86_BUILTIN_PHSUBSW256,
28895   IX86_BUILTIN_PMADDUBSW256,
28896   IX86_BUILTIN_PMADDWD256,
28897   IX86_BUILTIN_PMAXSB256,
28898   IX86_BUILTIN_PMAXSW256,
28899   IX86_BUILTIN_PMAXSD256,
28900   IX86_BUILTIN_PMAXUB256,
28901   IX86_BUILTIN_PMAXUW256,
28902   IX86_BUILTIN_PMAXUD256,
28903   IX86_BUILTIN_PMINSB256,
28904   IX86_BUILTIN_PMINSW256,
28905   IX86_BUILTIN_PMINSD256,
28906   IX86_BUILTIN_PMINUB256,
28907   IX86_BUILTIN_PMINUW256,
28908   IX86_BUILTIN_PMINUD256,
28909   IX86_BUILTIN_PMOVMSKB256,
28910   IX86_BUILTIN_PMOVSXBW256,
28911   IX86_BUILTIN_PMOVSXBD256,
28912   IX86_BUILTIN_PMOVSXBQ256,
28913   IX86_BUILTIN_PMOVSXWD256,
28914   IX86_BUILTIN_PMOVSXWQ256,
28915   IX86_BUILTIN_PMOVSXDQ256,
28916   IX86_BUILTIN_PMOVZXBW256,
28917   IX86_BUILTIN_PMOVZXBD256,
28918   IX86_BUILTIN_PMOVZXBQ256,
28919   IX86_BUILTIN_PMOVZXWD256,
28920   IX86_BUILTIN_PMOVZXWQ256,
28921   IX86_BUILTIN_PMOVZXDQ256,
28922   IX86_BUILTIN_PMULDQ256,
28923   IX86_BUILTIN_PMULHRSW256,
28924   IX86_BUILTIN_PMULHUW256,
28925   IX86_BUILTIN_PMULHW256,
28926   IX86_BUILTIN_PMULLW256,
28927   IX86_BUILTIN_PMULLD256,
28928   IX86_BUILTIN_PMULUDQ256,
28929   IX86_BUILTIN_POR256,
28930   IX86_BUILTIN_PSADBW256,
28931   IX86_BUILTIN_PSHUFB256,
28932   IX86_BUILTIN_PSHUFD256,
28933   IX86_BUILTIN_PSHUFHW256,
28934   IX86_BUILTIN_PSHUFLW256,
28935   IX86_BUILTIN_PSIGNB256,
28936   IX86_BUILTIN_PSIGNW256,
28937   IX86_BUILTIN_PSIGND256,
28938   IX86_BUILTIN_PSLLDQI256,
28939   IX86_BUILTIN_PSLLWI256,
28940   IX86_BUILTIN_PSLLW256,
28941   IX86_BUILTIN_PSLLDI256,
28942   IX86_BUILTIN_PSLLD256,
28943   IX86_BUILTIN_PSLLQI256,
28944   IX86_BUILTIN_PSLLQ256,
28945   IX86_BUILTIN_PSRAWI256,
28946   IX86_BUILTIN_PSRAW256,
28947   IX86_BUILTIN_PSRADI256,
28948   IX86_BUILTIN_PSRAD256,
28949   IX86_BUILTIN_PSRLDQI256,
28950   IX86_BUILTIN_PSRLWI256,
28951   IX86_BUILTIN_PSRLW256,
28952   IX86_BUILTIN_PSRLDI256,
28953   IX86_BUILTIN_PSRLD256,
28954   IX86_BUILTIN_PSRLQI256,
28955   IX86_BUILTIN_PSRLQ256,
28956   IX86_BUILTIN_PSUBB256,
28957   IX86_BUILTIN_PSUBW256,
28958   IX86_BUILTIN_PSUBD256,
28959   IX86_BUILTIN_PSUBQ256,
28960   IX86_BUILTIN_PSUBSB256,
28961   IX86_BUILTIN_PSUBSW256,
28962   IX86_BUILTIN_PSUBUSB256,
28963   IX86_BUILTIN_PSUBUSW256,
28964   IX86_BUILTIN_PUNPCKHBW256,
28965   IX86_BUILTIN_PUNPCKHWD256,
28966   IX86_BUILTIN_PUNPCKHDQ256,
28967   IX86_BUILTIN_PUNPCKHQDQ256,
28968   IX86_BUILTIN_PUNPCKLBW256,
28969   IX86_BUILTIN_PUNPCKLWD256,
28970   IX86_BUILTIN_PUNPCKLDQ256,
28971   IX86_BUILTIN_PUNPCKLQDQ256,
28972   IX86_BUILTIN_PXOR256,
28973   IX86_BUILTIN_MOVNTDQA256,
28974   IX86_BUILTIN_VBROADCASTSS_PS,
28975   IX86_BUILTIN_VBROADCASTSS_PS256,
28976   IX86_BUILTIN_VBROADCASTSD_PD256,
28977   IX86_BUILTIN_VBROADCASTSI256,
28978   IX86_BUILTIN_PBLENDD256,
28979   IX86_BUILTIN_PBLENDD128,
28980   IX86_BUILTIN_PBROADCASTB256,
28981   IX86_BUILTIN_PBROADCASTW256,
28982   IX86_BUILTIN_PBROADCASTD256,
28983   IX86_BUILTIN_PBROADCASTQ256,
28984   IX86_BUILTIN_PBROADCASTB128,
28985   IX86_BUILTIN_PBROADCASTW128,
28986   IX86_BUILTIN_PBROADCASTD128,
28987   IX86_BUILTIN_PBROADCASTQ128,
28988   IX86_BUILTIN_VPERMVARSI256,
28989   IX86_BUILTIN_VPERMDF256,
28990   IX86_BUILTIN_VPERMVARSF256,
28991   IX86_BUILTIN_VPERMDI256,
28992   IX86_BUILTIN_VPERMTI256,
28993   IX86_BUILTIN_VEXTRACT128I256,
28994   IX86_BUILTIN_VINSERT128I256,
28995   IX86_BUILTIN_MASKLOADD,
28996   IX86_BUILTIN_MASKLOADQ,
28997   IX86_BUILTIN_MASKLOADD256,
28998   IX86_BUILTIN_MASKLOADQ256,
28999   IX86_BUILTIN_MASKSTORED,
29000   IX86_BUILTIN_MASKSTOREQ,
29001   IX86_BUILTIN_MASKSTORED256,
29002   IX86_BUILTIN_MASKSTOREQ256,
29003   IX86_BUILTIN_PSLLVV4DI,
29004   IX86_BUILTIN_PSLLVV2DI,
29005   IX86_BUILTIN_PSLLVV8SI,
29006   IX86_BUILTIN_PSLLVV4SI,
29007   IX86_BUILTIN_PSRAVV8SI,
29008   IX86_BUILTIN_PSRAVV4SI,
29009   IX86_BUILTIN_PSRLVV4DI,
29010   IX86_BUILTIN_PSRLVV2DI,
29011   IX86_BUILTIN_PSRLVV8SI,
29012   IX86_BUILTIN_PSRLVV4SI,
29013 
29014   IX86_BUILTIN_GATHERSIV2DF,
29015   IX86_BUILTIN_GATHERSIV4DF,
29016   IX86_BUILTIN_GATHERDIV2DF,
29017   IX86_BUILTIN_GATHERDIV4DF,
29018   IX86_BUILTIN_GATHERSIV4SF,
29019   IX86_BUILTIN_GATHERSIV8SF,
29020   IX86_BUILTIN_GATHERDIV4SF,
29021   IX86_BUILTIN_GATHERDIV8SF,
29022   IX86_BUILTIN_GATHERSIV2DI,
29023   IX86_BUILTIN_GATHERSIV4DI,
29024   IX86_BUILTIN_GATHERDIV2DI,
29025   IX86_BUILTIN_GATHERDIV4DI,
29026   IX86_BUILTIN_GATHERSIV4SI,
29027   IX86_BUILTIN_GATHERSIV8SI,
29028   IX86_BUILTIN_GATHERDIV4SI,
29029   IX86_BUILTIN_GATHERDIV8SI,
29030 
29031   /* AVX512F */
29032   IX86_BUILTIN_SI512_SI256,
29033   IX86_BUILTIN_PD512_PD256,
29034   IX86_BUILTIN_PS512_PS256,
29035   IX86_BUILTIN_SI512_SI,
29036   IX86_BUILTIN_PD512_PD,
29037   IX86_BUILTIN_PS512_PS,
29038   IX86_BUILTIN_ADDPD512,
29039   IX86_BUILTIN_ADDPS512,
29040   IX86_BUILTIN_ADDSD_ROUND,
29041   IX86_BUILTIN_ADDSS_ROUND,
29042   IX86_BUILTIN_ALIGND512,
29043   IX86_BUILTIN_ALIGNQ512,
29044   IX86_BUILTIN_BLENDMD512,
29045   IX86_BUILTIN_BLENDMPD512,
29046   IX86_BUILTIN_BLENDMPS512,
29047   IX86_BUILTIN_BLENDMQ512,
29048   IX86_BUILTIN_BROADCASTF32X4_512,
29049   IX86_BUILTIN_BROADCASTF64X4_512,
29050   IX86_BUILTIN_BROADCASTI32X4_512,
29051   IX86_BUILTIN_BROADCASTI64X4_512,
29052   IX86_BUILTIN_BROADCASTSD512,
29053   IX86_BUILTIN_BROADCASTSS512,
29054   IX86_BUILTIN_CMPD512,
29055   IX86_BUILTIN_CMPPD512,
29056   IX86_BUILTIN_CMPPS512,
29057   IX86_BUILTIN_CMPQ512,
29058   IX86_BUILTIN_CMPSD_MASK,
29059   IX86_BUILTIN_CMPSS_MASK,
29060   IX86_BUILTIN_COMIDF,
29061   IX86_BUILTIN_COMISF,
29062   IX86_BUILTIN_COMPRESSPD512,
29063   IX86_BUILTIN_COMPRESSPDSTORE512,
29064   IX86_BUILTIN_COMPRESSPS512,
29065   IX86_BUILTIN_COMPRESSPSSTORE512,
29066   IX86_BUILTIN_CVTDQ2PD512,
29067   IX86_BUILTIN_CVTDQ2PS512,
29068   IX86_BUILTIN_CVTPD2DQ512,
29069   IX86_BUILTIN_CVTPD2PS512,
29070   IX86_BUILTIN_CVTPD2UDQ512,
29071   IX86_BUILTIN_CVTPH2PS512,
29072   IX86_BUILTIN_CVTPS2DQ512,
29073   IX86_BUILTIN_CVTPS2PD512,
29074   IX86_BUILTIN_CVTPS2PH512,
29075   IX86_BUILTIN_CVTPS2UDQ512,
29076   IX86_BUILTIN_CVTSD2SS_ROUND,
29077   IX86_BUILTIN_CVTSI2SD64,
29078   IX86_BUILTIN_CVTSI2SS32,
29079   IX86_BUILTIN_CVTSI2SS64,
29080   IX86_BUILTIN_CVTSS2SD_ROUND,
29081   IX86_BUILTIN_CVTTPD2DQ512,
29082   IX86_BUILTIN_CVTTPD2UDQ512,
29083   IX86_BUILTIN_CVTTPS2DQ512,
29084   IX86_BUILTIN_CVTTPS2UDQ512,
29085   IX86_BUILTIN_CVTUDQ2PD512,
29086   IX86_BUILTIN_CVTUDQ2PS512,
29087   IX86_BUILTIN_CVTUSI2SD32,
29088   IX86_BUILTIN_CVTUSI2SD64,
29089   IX86_BUILTIN_CVTUSI2SS32,
29090   IX86_BUILTIN_CVTUSI2SS64,
29091   IX86_BUILTIN_DIVPD512,
29092   IX86_BUILTIN_DIVPS512,
29093   IX86_BUILTIN_DIVSD_ROUND,
29094   IX86_BUILTIN_DIVSS_ROUND,
29095   IX86_BUILTIN_EXPANDPD512,
29096   IX86_BUILTIN_EXPANDPD512Z,
29097   IX86_BUILTIN_EXPANDPDLOAD512,
29098   IX86_BUILTIN_EXPANDPDLOAD512Z,
29099   IX86_BUILTIN_EXPANDPS512,
29100   IX86_BUILTIN_EXPANDPS512Z,
29101   IX86_BUILTIN_EXPANDPSLOAD512,
29102   IX86_BUILTIN_EXPANDPSLOAD512Z,
29103   IX86_BUILTIN_EXTRACTF32X4,
29104   IX86_BUILTIN_EXTRACTF64X4,
29105   IX86_BUILTIN_EXTRACTI32X4,
29106   IX86_BUILTIN_EXTRACTI64X4,
29107   IX86_BUILTIN_FIXUPIMMPD512_MASK,
29108   IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
29109   IX86_BUILTIN_FIXUPIMMPS512_MASK,
29110   IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
29111   IX86_BUILTIN_FIXUPIMMSD128_MASK,
29112   IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
29113   IX86_BUILTIN_FIXUPIMMSS128_MASK,
29114   IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
29115   IX86_BUILTIN_GETEXPPD512,
29116   IX86_BUILTIN_GETEXPPS512,
29117   IX86_BUILTIN_GETEXPSD128,
29118   IX86_BUILTIN_GETEXPSS128,
29119   IX86_BUILTIN_GETMANTPD512,
29120   IX86_BUILTIN_GETMANTPS512,
29121   IX86_BUILTIN_GETMANTSD128,
29122   IX86_BUILTIN_GETMANTSS128,
29123   IX86_BUILTIN_INSERTF32X4,
29124   IX86_BUILTIN_INSERTF64X4,
29125   IX86_BUILTIN_INSERTI32X4,
29126   IX86_BUILTIN_INSERTI64X4,
29127   IX86_BUILTIN_LOADAPD512,
29128   IX86_BUILTIN_LOADAPS512,
29129   IX86_BUILTIN_LOADDQUDI512,
29130   IX86_BUILTIN_LOADDQUSI512,
29131   IX86_BUILTIN_LOADUPD512,
29132   IX86_BUILTIN_LOADUPS512,
29133   IX86_BUILTIN_MAXPD512,
29134   IX86_BUILTIN_MAXPS512,
29135   IX86_BUILTIN_MAXSD_ROUND,
29136   IX86_BUILTIN_MAXSS_ROUND,
29137   IX86_BUILTIN_MINPD512,
29138   IX86_BUILTIN_MINPS512,
29139   IX86_BUILTIN_MINSD_ROUND,
29140   IX86_BUILTIN_MINSS_ROUND,
29141   IX86_BUILTIN_MOVAPD512,
29142   IX86_BUILTIN_MOVAPS512,
29143   IX86_BUILTIN_MOVDDUP512,
29144   IX86_BUILTIN_MOVDQA32LOAD512,
29145   IX86_BUILTIN_MOVDQA32STORE512,
29146   IX86_BUILTIN_MOVDQA32_512,
29147   IX86_BUILTIN_MOVDQA64LOAD512,
29148   IX86_BUILTIN_MOVDQA64STORE512,
29149   IX86_BUILTIN_MOVDQA64_512,
29150   IX86_BUILTIN_MOVNTDQ512,
29151   IX86_BUILTIN_MOVNTDQA512,
29152   IX86_BUILTIN_MOVNTPD512,
29153   IX86_BUILTIN_MOVNTPS512,
29154   IX86_BUILTIN_MOVSHDUP512,
29155   IX86_BUILTIN_MOVSLDUP512,
29156   IX86_BUILTIN_MULPD512,
29157   IX86_BUILTIN_MULPS512,
29158   IX86_BUILTIN_MULSD_ROUND,
29159   IX86_BUILTIN_MULSS_ROUND,
29160   IX86_BUILTIN_PABSD512,
29161   IX86_BUILTIN_PABSQ512,
29162   IX86_BUILTIN_PADDD512,
29163   IX86_BUILTIN_PADDQ512,
29164   IX86_BUILTIN_PANDD512,
29165   IX86_BUILTIN_PANDND512,
29166   IX86_BUILTIN_PANDNQ512,
29167   IX86_BUILTIN_PANDQ512,
29168   IX86_BUILTIN_PBROADCASTD512,
29169   IX86_BUILTIN_PBROADCASTD512_GPR,
29170   IX86_BUILTIN_PBROADCASTMB512,
29171   IX86_BUILTIN_PBROADCASTMW512,
29172   IX86_BUILTIN_PBROADCASTQ512,
29173   IX86_BUILTIN_PBROADCASTQ512_GPR,
29174   IX86_BUILTIN_PCMPEQD512_MASK,
29175   IX86_BUILTIN_PCMPEQQ512_MASK,
29176   IX86_BUILTIN_PCMPGTD512_MASK,
29177   IX86_BUILTIN_PCMPGTQ512_MASK,
29178   IX86_BUILTIN_PCOMPRESSD512,
29179   IX86_BUILTIN_PCOMPRESSDSTORE512,
29180   IX86_BUILTIN_PCOMPRESSQ512,
29181   IX86_BUILTIN_PCOMPRESSQSTORE512,
29182   IX86_BUILTIN_PEXPANDD512,
29183   IX86_BUILTIN_PEXPANDD512Z,
29184   IX86_BUILTIN_PEXPANDDLOAD512,
29185   IX86_BUILTIN_PEXPANDDLOAD512Z,
29186   IX86_BUILTIN_PEXPANDQ512,
29187   IX86_BUILTIN_PEXPANDQ512Z,
29188   IX86_BUILTIN_PEXPANDQLOAD512,
29189   IX86_BUILTIN_PEXPANDQLOAD512Z,
29190   IX86_BUILTIN_PMAXSD512,
29191   IX86_BUILTIN_PMAXSQ512,
29192   IX86_BUILTIN_PMAXUD512,
29193   IX86_BUILTIN_PMAXUQ512,
29194   IX86_BUILTIN_PMINSD512,
29195   IX86_BUILTIN_PMINSQ512,
29196   IX86_BUILTIN_PMINUD512,
29197   IX86_BUILTIN_PMINUQ512,
29198   IX86_BUILTIN_PMOVDB512,
29199   IX86_BUILTIN_PMOVDB512_MEM,
29200   IX86_BUILTIN_PMOVDW512,
29201   IX86_BUILTIN_PMOVDW512_MEM,
29202   IX86_BUILTIN_PMOVQB512,
29203   IX86_BUILTIN_PMOVQB512_MEM,
29204   IX86_BUILTIN_PMOVQD512,
29205   IX86_BUILTIN_PMOVQD512_MEM,
29206   IX86_BUILTIN_PMOVQW512,
29207   IX86_BUILTIN_PMOVQW512_MEM,
29208   IX86_BUILTIN_PMOVSDB512,
29209   IX86_BUILTIN_PMOVSDB512_MEM,
29210   IX86_BUILTIN_PMOVSDW512,
29211   IX86_BUILTIN_PMOVSDW512_MEM,
29212   IX86_BUILTIN_PMOVSQB512,
29213   IX86_BUILTIN_PMOVSQB512_MEM,
29214   IX86_BUILTIN_PMOVSQD512,
29215   IX86_BUILTIN_PMOVSQD512_MEM,
29216   IX86_BUILTIN_PMOVSQW512,
29217   IX86_BUILTIN_PMOVSQW512_MEM,
29218   IX86_BUILTIN_PMOVSXBD512,
29219   IX86_BUILTIN_PMOVSXBQ512,
29220   IX86_BUILTIN_PMOVSXDQ512,
29221   IX86_BUILTIN_PMOVSXWD512,
29222   IX86_BUILTIN_PMOVSXWQ512,
29223   IX86_BUILTIN_PMOVUSDB512,
29224   IX86_BUILTIN_PMOVUSDB512_MEM,
29225   IX86_BUILTIN_PMOVUSDW512,
29226   IX86_BUILTIN_PMOVUSDW512_MEM,
29227   IX86_BUILTIN_PMOVUSQB512,
29228   IX86_BUILTIN_PMOVUSQB512_MEM,
29229   IX86_BUILTIN_PMOVUSQD512,
29230   IX86_BUILTIN_PMOVUSQD512_MEM,
29231   IX86_BUILTIN_PMOVUSQW512,
29232   IX86_BUILTIN_PMOVUSQW512_MEM,
29233   IX86_BUILTIN_PMOVZXBD512,
29234   IX86_BUILTIN_PMOVZXBQ512,
29235   IX86_BUILTIN_PMOVZXDQ512,
29236   IX86_BUILTIN_PMOVZXWD512,
29237   IX86_BUILTIN_PMOVZXWQ512,
29238   IX86_BUILTIN_PMULDQ512,
29239   IX86_BUILTIN_PMULLD512,
29240   IX86_BUILTIN_PMULUDQ512,
29241   IX86_BUILTIN_PORD512,
29242   IX86_BUILTIN_PORQ512,
29243   IX86_BUILTIN_PROLD512,
29244   IX86_BUILTIN_PROLQ512,
29245   IX86_BUILTIN_PROLVD512,
29246   IX86_BUILTIN_PROLVQ512,
29247   IX86_BUILTIN_PRORD512,
29248   IX86_BUILTIN_PRORQ512,
29249   IX86_BUILTIN_PRORVD512,
29250   IX86_BUILTIN_PRORVQ512,
29251   IX86_BUILTIN_PSHUFD512,
29252   IX86_BUILTIN_PSLLD512,
29253   IX86_BUILTIN_PSLLDI512,
29254   IX86_BUILTIN_PSLLQ512,
29255   IX86_BUILTIN_PSLLQI512,
29256   IX86_BUILTIN_PSLLVV16SI,
29257   IX86_BUILTIN_PSLLVV8DI,
29258   IX86_BUILTIN_PSRAD512,
29259   IX86_BUILTIN_PSRADI512,
29260   IX86_BUILTIN_PSRAQ512,
29261   IX86_BUILTIN_PSRAQI512,
29262   IX86_BUILTIN_PSRAVV16SI,
29263   IX86_BUILTIN_PSRAVV8DI,
29264   IX86_BUILTIN_PSRLD512,
29265   IX86_BUILTIN_PSRLDI512,
29266   IX86_BUILTIN_PSRLQ512,
29267   IX86_BUILTIN_PSRLQI512,
29268   IX86_BUILTIN_PSRLVV16SI,
29269   IX86_BUILTIN_PSRLVV8DI,
29270   IX86_BUILTIN_PSUBD512,
29271   IX86_BUILTIN_PSUBQ512,
29272   IX86_BUILTIN_PTESTMD512,
29273   IX86_BUILTIN_PTESTMQ512,
29274   IX86_BUILTIN_PTESTNMD512,
29275   IX86_BUILTIN_PTESTNMQ512,
29276   IX86_BUILTIN_PUNPCKHDQ512,
29277   IX86_BUILTIN_PUNPCKHQDQ512,
29278   IX86_BUILTIN_PUNPCKLDQ512,
29279   IX86_BUILTIN_PUNPCKLQDQ512,
29280   IX86_BUILTIN_PXORD512,
29281   IX86_BUILTIN_PXORQ512,
29282   IX86_BUILTIN_RCP14PD512,
29283   IX86_BUILTIN_RCP14PS512,
29284   IX86_BUILTIN_RCP14SD,
29285   IX86_BUILTIN_RCP14SS,
29286   IX86_BUILTIN_RNDSCALEPD,
29287   IX86_BUILTIN_RNDSCALEPS,
29288   IX86_BUILTIN_RNDSCALESD,
29289   IX86_BUILTIN_RNDSCALESS,
29290   IX86_BUILTIN_RSQRT14PD512,
29291   IX86_BUILTIN_RSQRT14PS512,
29292   IX86_BUILTIN_RSQRT14SD,
29293   IX86_BUILTIN_RSQRT14SS,
29294   IX86_BUILTIN_SCALEFPD512,
29295   IX86_BUILTIN_SCALEFPS512,
29296   IX86_BUILTIN_SCALEFSD,
29297   IX86_BUILTIN_SCALEFSS,
29298   IX86_BUILTIN_SHUFPD512,
29299   IX86_BUILTIN_SHUFPS512,
29300   IX86_BUILTIN_SHUF_F32x4,
29301   IX86_BUILTIN_SHUF_F64x2,
29302   IX86_BUILTIN_SHUF_I32x4,
29303   IX86_BUILTIN_SHUF_I64x2,
29304   IX86_BUILTIN_SQRTPD512,
29305   IX86_BUILTIN_SQRTPD512_MASK,
29306   IX86_BUILTIN_SQRTPS512_MASK,
29307   IX86_BUILTIN_SQRTPS_NR512,
29308   IX86_BUILTIN_SQRTSD_ROUND,
29309   IX86_BUILTIN_SQRTSS_ROUND,
29310   IX86_BUILTIN_STOREAPD512,
29311   IX86_BUILTIN_STOREAPS512,
29312   IX86_BUILTIN_STOREDQUDI512,
29313   IX86_BUILTIN_STOREDQUSI512,
29314   IX86_BUILTIN_STOREUPD512,
29315   IX86_BUILTIN_STOREUPS512,
29316   IX86_BUILTIN_SUBPD512,
29317   IX86_BUILTIN_SUBPS512,
29318   IX86_BUILTIN_SUBSD_ROUND,
29319   IX86_BUILTIN_SUBSS_ROUND,
29320   IX86_BUILTIN_UCMPD512,
29321   IX86_BUILTIN_UCMPQ512,
29322   IX86_BUILTIN_UNPCKHPD512,
29323   IX86_BUILTIN_UNPCKHPS512,
29324   IX86_BUILTIN_UNPCKLPD512,
29325   IX86_BUILTIN_UNPCKLPS512,
29326   IX86_BUILTIN_VCVTSD2SI32,
29327   IX86_BUILTIN_VCVTSD2SI64,
29328   IX86_BUILTIN_VCVTSD2USI32,
29329   IX86_BUILTIN_VCVTSD2USI64,
29330   IX86_BUILTIN_VCVTSS2SI32,
29331   IX86_BUILTIN_VCVTSS2SI64,
29332   IX86_BUILTIN_VCVTSS2USI32,
29333   IX86_BUILTIN_VCVTSS2USI64,
29334   IX86_BUILTIN_VCVTTSD2SI32,
29335   IX86_BUILTIN_VCVTTSD2SI64,
29336   IX86_BUILTIN_VCVTTSD2USI32,
29337   IX86_BUILTIN_VCVTTSD2USI64,
29338   IX86_BUILTIN_VCVTTSS2SI32,
29339   IX86_BUILTIN_VCVTTSS2SI64,
29340   IX86_BUILTIN_VCVTTSS2USI32,
29341   IX86_BUILTIN_VCVTTSS2USI64,
29342   IX86_BUILTIN_VFMADDPD512_MASK,
29343   IX86_BUILTIN_VFMADDPD512_MASK3,
29344   IX86_BUILTIN_VFMADDPD512_MASKZ,
29345   IX86_BUILTIN_VFMADDPS512_MASK,
29346   IX86_BUILTIN_VFMADDPS512_MASK3,
29347   IX86_BUILTIN_VFMADDPS512_MASKZ,
29348   IX86_BUILTIN_VFMADDSD3_ROUND,
29349   IX86_BUILTIN_VFMADDSS3_ROUND,
29350   IX86_BUILTIN_VFMADDSUBPD512_MASK,
29351   IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29352   IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29353   IX86_BUILTIN_VFMADDSUBPS512_MASK,
29354   IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29355   IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29356   IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29357   IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29358   IX86_BUILTIN_VFMSUBPD512_MASK3,
29359   IX86_BUILTIN_VFMSUBPS512_MASK3,
29360   IX86_BUILTIN_VFMSUBSD3_MASK3,
29361   IX86_BUILTIN_VFMSUBSS3_MASK3,
29362   IX86_BUILTIN_VFNMADDPD512_MASK,
29363   IX86_BUILTIN_VFNMADDPS512_MASK,
29364   IX86_BUILTIN_VFNMSUBPD512_MASK,
29365   IX86_BUILTIN_VFNMSUBPD512_MASK3,
29366   IX86_BUILTIN_VFNMSUBPS512_MASK,
29367   IX86_BUILTIN_VFNMSUBPS512_MASK3,
29368   IX86_BUILTIN_VPCLZCNTD512,
29369   IX86_BUILTIN_VPCLZCNTQ512,
29370   IX86_BUILTIN_VPCONFLICTD512,
29371   IX86_BUILTIN_VPCONFLICTQ512,
29372   IX86_BUILTIN_VPERMDF512,
29373   IX86_BUILTIN_VPERMDI512,
29374   IX86_BUILTIN_VPERMI2VARD512,
29375   IX86_BUILTIN_VPERMI2VARPD512,
29376   IX86_BUILTIN_VPERMI2VARPS512,
29377   IX86_BUILTIN_VPERMI2VARQ512,
29378   IX86_BUILTIN_VPERMILPD512,
29379   IX86_BUILTIN_VPERMILPS512,
29380   IX86_BUILTIN_VPERMILVARPD512,
29381   IX86_BUILTIN_VPERMILVARPS512,
29382   IX86_BUILTIN_VPERMT2VARD512,
29383   IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29384   IX86_BUILTIN_VPERMT2VARPD512,
29385   IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29386   IX86_BUILTIN_VPERMT2VARPS512,
29387   IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29388   IX86_BUILTIN_VPERMT2VARQ512,
29389   IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29390   IX86_BUILTIN_VPERMVARDF512,
29391   IX86_BUILTIN_VPERMVARDI512,
29392   IX86_BUILTIN_VPERMVARSF512,
29393   IX86_BUILTIN_VPERMVARSI512,
29394   IX86_BUILTIN_VTERNLOGD512_MASK,
29395   IX86_BUILTIN_VTERNLOGD512_MASKZ,
29396   IX86_BUILTIN_VTERNLOGQ512_MASK,
29397   IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29398 
29399   /* Mask arithmetic operations */
29400   IX86_BUILTIN_KAND16,
29401   IX86_BUILTIN_KANDN16,
29402   IX86_BUILTIN_KNOT16,
29403   IX86_BUILTIN_KOR16,
29404   IX86_BUILTIN_KORTESTC16,
29405   IX86_BUILTIN_KORTESTZ16,
29406   IX86_BUILTIN_KUNPCKBW,
29407   IX86_BUILTIN_KXNOR16,
29408   IX86_BUILTIN_KXOR16,
29409   IX86_BUILTIN_KMOV16,
29410 
29411   /* AVX512VL.  */
29412   IX86_BUILTIN_PMOVUSQD256_MEM,
29413   IX86_BUILTIN_PMOVUSQD128_MEM,
29414   IX86_BUILTIN_PMOVSQD256_MEM,
29415   IX86_BUILTIN_PMOVSQD128_MEM,
29416   IX86_BUILTIN_PMOVQD256_MEM,
29417   IX86_BUILTIN_PMOVQD128_MEM,
29418   IX86_BUILTIN_PMOVUSQW256_MEM,
29419   IX86_BUILTIN_PMOVUSQW128_MEM,
29420   IX86_BUILTIN_PMOVSQW256_MEM,
29421   IX86_BUILTIN_PMOVSQW128_MEM,
29422   IX86_BUILTIN_PMOVQW256_MEM,
29423   IX86_BUILTIN_PMOVQW128_MEM,
29424   IX86_BUILTIN_PMOVUSQB256_MEM,
29425   IX86_BUILTIN_PMOVUSQB128_MEM,
29426   IX86_BUILTIN_PMOVSQB256_MEM,
29427   IX86_BUILTIN_PMOVSQB128_MEM,
29428   IX86_BUILTIN_PMOVQB256_MEM,
29429   IX86_BUILTIN_PMOVQB128_MEM,
29430   IX86_BUILTIN_PMOVUSDW256_MEM,
29431   IX86_BUILTIN_PMOVUSDW128_MEM,
29432   IX86_BUILTIN_PMOVSDW256_MEM,
29433   IX86_BUILTIN_PMOVSDW128_MEM,
29434   IX86_BUILTIN_PMOVDW256_MEM,
29435   IX86_BUILTIN_PMOVDW128_MEM,
29436   IX86_BUILTIN_PMOVUSDB256_MEM,
29437   IX86_BUILTIN_PMOVUSDB128_MEM,
29438   IX86_BUILTIN_PMOVSDB256_MEM,
29439   IX86_BUILTIN_PMOVSDB128_MEM,
29440   IX86_BUILTIN_PMOVDB256_MEM,
29441   IX86_BUILTIN_PMOVDB128_MEM,
29442   IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29443   IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29444   IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29445   IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29446   IX86_BUILTIN_MOVDQA64STORE256_MASK,
29447   IX86_BUILTIN_MOVDQA64STORE128_MASK,
29448   IX86_BUILTIN_MOVDQA32STORE256_MASK,
29449   IX86_BUILTIN_MOVDQA32STORE128_MASK,
29450   IX86_BUILTIN_LOADAPD256_MASK,
29451   IX86_BUILTIN_LOADAPD128_MASK,
29452   IX86_BUILTIN_LOADAPS256_MASK,
29453   IX86_BUILTIN_LOADAPS128_MASK,
29454   IX86_BUILTIN_STOREAPD256_MASK,
29455   IX86_BUILTIN_STOREAPD128_MASK,
29456   IX86_BUILTIN_STOREAPS256_MASK,
29457   IX86_BUILTIN_STOREAPS128_MASK,
29458   IX86_BUILTIN_LOADUPD256_MASK,
29459   IX86_BUILTIN_LOADUPD128_MASK,
29460   IX86_BUILTIN_LOADUPS256_MASK,
29461   IX86_BUILTIN_LOADUPS128_MASK,
29462   IX86_BUILTIN_STOREUPD256_MASK,
29463   IX86_BUILTIN_STOREUPD128_MASK,
29464   IX86_BUILTIN_STOREUPS256_MASK,
29465   IX86_BUILTIN_STOREUPS128_MASK,
29466   IX86_BUILTIN_LOADDQUDI256_MASK,
29467   IX86_BUILTIN_LOADDQUDI128_MASK,
29468   IX86_BUILTIN_LOADDQUSI256_MASK,
29469   IX86_BUILTIN_LOADDQUSI128_MASK,
29470   IX86_BUILTIN_LOADDQUHI256_MASK,
29471   IX86_BUILTIN_LOADDQUHI128_MASK,
29472   IX86_BUILTIN_LOADDQUQI256_MASK,
29473   IX86_BUILTIN_LOADDQUQI128_MASK,
29474   IX86_BUILTIN_STOREDQUDI256_MASK,
29475   IX86_BUILTIN_STOREDQUDI128_MASK,
29476   IX86_BUILTIN_STOREDQUSI256_MASK,
29477   IX86_BUILTIN_STOREDQUSI128_MASK,
29478   IX86_BUILTIN_STOREDQUHI256_MASK,
29479   IX86_BUILTIN_STOREDQUHI128_MASK,
29480   IX86_BUILTIN_STOREDQUQI256_MASK,
29481   IX86_BUILTIN_STOREDQUQI128_MASK,
29482   IX86_BUILTIN_COMPRESSPDSTORE256,
29483   IX86_BUILTIN_COMPRESSPDSTORE128,
29484   IX86_BUILTIN_COMPRESSPSSTORE256,
29485   IX86_BUILTIN_COMPRESSPSSTORE128,
29486   IX86_BUILTIN_PCOMPRESSQSTORE256,
29487   IX86_BUILTIN_PCOMPRESSQSTORE128,
29488   IX86_BUILTIN_PCOMPRESSDSTORE256,
29489   IX86_BUILTIN_PCOMPRESSDSTORE128,
29490   IX86_BUILTIN_EXPANDPDLOAD256,
29491   IX86_BUILTIN_EXPANDPDLOAD128,
29492   IX86_BUILTIN_EXPANDPSLOAD256,
29493   IX86_BUILTIN_EXPANDPSLOAD128,
29494   IX86_BUILTIN_PEXPANDQLOAD256,
29495   IX86_BUILTIN_PEXPANDQLOAD128,
29496   IX86_BUILTIN_PEXPANDDLOAD256,
29497   IX86_BUILTIN_PEXPANDDLOAD128,
29498   IX86_BUILTIN_EXPANDPDLOAD256Z,
29499   IX86_BUILTIN_EXPANDPDLOAD128Z,
29500   IX86_BUILTIN_EXPANDPSLOAD256Z,
29501   IX86_BUILTIN_EXPANDPSLOAD128Z,
29502   IX86_BUILTIN_PEXPANDQLOAD256Z,
29503   IX86_BUILTIN_PEXPANDQLOAD128Z,
29504   IX86_BUILTIN_PEXPANDDLOAD256Z,
29505   IX86_BUILTIN_PEXPANDDLOAD128Z,
29506   IX86_BUILTIN_PALIGNR256_MASK,
29507   IX86_BUILTIN_PALIGNR128_MASK,
29508   IX86_BUILTIN_MOVDQA64_256_MASK,
29509   IX86_BUILTIN_MOVDQA64_128_MASK,
29510   IX86_BUILTIN_MOVDQA32_256_MASK,
29511   IX86_BUILTIN_MOVDQA32_128_MASK,
29512   IX86_BUILTIN_MOVAPD256_MASK,
29513   IX86_BUILTIN_MOVAPD128_MASK,
29514   IX86_BUILTIN_MOVAPS256_MASK,
29515   IX86_BUILTIN_MOVAPS128_MASK,
29516   IX86_BUILTIN_MOVDQUHI256_MASK,
29517   IX86_BUILTIN_MOVDQUHI128_MASK,
29518   IX86_BUILTIN_MOVDQUQI256_MASK,
29519   IX86_BUILTIN_MOVDQUQI128_MASK,
29520   IX86_BUILTIN_MINPS128_MASK,
29521   IX86_BUILTIN_MAXPS128_MASK,
29522   IX86_BUILTIN_MINPD128_MASK,
29523   IX86_BUILTIN_MAXPD128_MASK,
29524   IX86_BUILTIN_MAXPD256_MASK,
29525   IX86_BUILTIN_MAXPS256_MASK,
29526   IX86_BUILTIN_MINPD256_MASK,
29527   IX86_BUILTIN_MINPS256_MASK,
29528   IX86_BUILTIN_MULPS128_MASK,
29529   IX86_BUILTIN_DIVPS128_MASK,
29530   IX86_BUILTIN_MULPD128_MASK,
29531   IX86_BUILTIN_DIVPD128_MASK,
29532   IX86_BUILTIN_DIVPD256_MASK,
29533   IX86_BUILTIN_DIVPS256_MASK,
29534   IX86_BUILTIN_MULPD256_MASK,
29535   IX86_BUILTIN_MULPS256_MASK,
29536   IX86_BUILTIN_ADDPD128_MASK,
29537   IX86_BUILTIN_ADDPD256_MASK,
29538   IX86_BUILTIN_ADDPS128_MASK,
29539   IX86_BUILTIN_ADDPS256_MASK,
29540   IX86_BUILTIN_SUBPD128_MASK,
29541   IX86_BUILTIN_SUBPD256_MASK,
29542   IX86_BUILTIN_SUBPS128_MASK,
29543   IX86_BUILTIN_SUBPS256_MASK,
29544   IX86_BUILTIN_XORPD256_MASK,
29545   IX86_BUILTIN_XORPD128_MASK,
29546   IX86_BUILTIN_XORPS256_MASK,
29547   IX86_BUILTIN_XORPS128_MASK,
29548   IX86_BUILTIN_ORPD256_MASK,
29549   IX86_BUILTIN_ORPD128_MASK,
29550   IX86_BUILTIN_ORPS256_MASK,
29551   IX86_BUILTIN_ORPS128_MASK,
29552   IX86_BUILTIN_BROADCASTF32x2_256,
29553   IX86_BUILTIN_BROADCASTI32x2_256,
29554   IX86_BUILTIN_BROADCASTI32x2_128,
29555   IX86_BUILTIN_BROADCASTF64X2_256,
29556   IX86_BUILTIN_BROADCASTI64X2_256,
29557   IX86_BUILTIN_BROADCASTF32X4_256,
29558   IX86_BUILTIN_BROADCASTI32X4_256,
29559   IX86_BUILTIN_EXTRACTF32X4_256,
29560   IX86_BUILTIN_EXTRACTI32X4_256,
29561   IX86_BUILTIN_DBPSADBW256,
29562   IX86_BUILTIN_DBPSADBW128,
29563   IX86_BUILTIN_CVTTPD2QQ256,
29564   IX86_BUILTIN_CVTTPD2QQ128,
29565   IX86_BUILTIN_CVTTPD2UQQ256,
29566   IX86_BUILTIN_CVTTPD2UQQ128,
29567   IX86_BUILTIN_CVTPD2QQ256,
29568   IX86_BUILTIN_CVTPD2QQ128,
29569   IX86_BUILTIN_CVTPD2UQQ256,
29570   IX86_BUILTIN_CVTPD2UQQ128,
29571   IX86_BUILTIN_CVTPD2UDQ256_MASK,
29572   IX86_BUILTIN_CVTPD2UDQ128_MASK,
29573   IX86_BUILTIN_CVTTPS2QQ256,
29574   IX86_BUILTIN_CVTTPS2QQ128,
29575   IX86_BUILTIN_CVTTPS2UQQ256,
29576   IX86_BUILTIN_CVTTPS2UQQ128,
29577   IX86_BUILTIN_CVTTPS2DQ256_MASK,
29578   IX86_BUILTIN_CVTTPS2DQ128_MASK,
29579   IX86_BUILTIN_CVTTPS2UDQ256,
29580   IX86_BUILTIN_CVTTPS2UDQ128,
29581   IX86_BUILTIN_CVTTPD2DQ256_MASK,
29582   IX86_BUILTIN_CVTTPD2DQ128_MASK,
29583   IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29584   IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29585   IX86_BUILTIN_CVTPD2DQ256_MASK,
29586   IX86_BUILTIN_CVTPD2DQ128_MASK,
29587   IX86_BUILTIN_CVTDQ2PD256_MASK,
29588   IX86_BUILTIN_CVTDQ2PD128_MASK,
29589   IX86_BUILTIN_CVTUDQ2PD256_MASK,
29590   IX86_BUILTIN_CVTUDQ2PD128_MASK,
29591   IX86_BUILTIN_CVTDQ2PS256_MASK,
29592   IX86_BUILTIN_CVTDQ2PS128_MASK,
29593   IX86_BUILTIN_CVTUDQ2PS256_MASK,
29594   IX86_BUILTIN_CVTUDQ2PS128_MASK,
29595   IX86_BUILTIN_CVTPS2PD256_MASK,
29596   IX86_BUILTIN_CVTPS2PD128_MASK,
29597   IX86_BUILTIN_PBROADCASTB256_MASK,
29598   IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29599   IX86_BUILTIN_PBROADCASTB128_MASK,
29600   IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29601   IX86_BUILTIN_PBROADCASTW256_MASK,
29602   IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29603   IX86_BUILTIN_PBROADCASTW128_MASK,
29604   IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29605   IX86_BUILTIN_PBROADCASTD256_MASK,
29606   IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29607   IX86_BUILTIN_PBROADCASTD128_MASK,
29608   IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29609   IX86_BUILTIN_PBROADCASTQ256_MASK,
29610   IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29611   IX86_BUILTIN_PBROADCASTQ128_MASK,
29612   IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29613   IX86_BUILTIN_BROADCASTSS256,
29614   IX86_BUILTIN_BROADCASTSS128,
29615   IX86_BUILTIN_BROADCASTSD256,
29616   IX86_BUILTIN_EXTRACTF64X2_256,
29617   IX86_BUILTIN_EXTRACTI64X2_256,
29618   IX86_BUILTIN_INSERTF32X4_256,
29619   IX86_BUILTIN_INSERTI32X4_256,
29620   IX86_BUILTIN_PMOVSXBW256_MASK,
29621   IX86_BUILTIN_PMOVSXBW128_MASK,
29622   IX86_BUILTIN_PMOVSXBD256_MASK,
29623   IX86_BUILTIN_PMOVSXBD128_MASK,
29624   IX86_BUILTIN_PMOVSXBQ256_MASK,
29625   IX86_BUILTIN_PMOVSXBQ128_MASK,
29626   IX86_BUILTIN_PMOVSXWD256_MASK,
29627   IX86_BUILTIN_PMOVSXWD128_MASK,
29628   IX86_BUILTIN_PMOVSXWQ256_MASK,
29629   IX86_BUILTIN_PMOVSXWQ128_MASK,
29630   IX86_BUILTIN_PMOVSXDQ256_MASK,
29631   IX86_BUILTIN_PMOVSXDQ128_MASK,
29632   IX86_BUILTIN_PMOVZXBW256_MASK,
29633   IX86_BUILTIN_PMOVZXBW128_MASK,
29634   IX86_BUILTIN_PMOVZXBD256_MASK,
29635   IX86_BUILTIN_PMOVZXBD128_MASK,
29636   IX86_BUILTIN_PMOVZXBQ256_MASK,
29637   IX86_BUILTIN_PMOVZXBQ128_MASK,
29638   IX86_BUILTIN_PMOVZXWD256_MASK,
29639   IX86_BUILTIN_PMOVZXWD128_MASK,
29640   IX86_BUILTIN_PMOVZXWQ256_MASK,
29641   IX86_BUILTIN_PMOVZXWQ128_MASK,
29642   IX86_BUILTIN_PMOVZXDQ256_MASK,
29643   IX86_BUILTIN_PMOVZXDQ128_MASK,
29644   IX86_BUILTIN_REDUCEPD256_MASK,
29645   IX86_BUILTIN_REDUCEPD128_MASK,
29646   IX86_BUILTIN_REDUCEPS256_MASK,
29647   IX86_BUILTIN_REDUCEPS128_MASK,
29648   IX86_BUILTIN_REDUCESD_MASK,
29649   IX86_BUILTIN_REDUCESS_MASK,
29650   IX86_BUILTIN_VPERMVARHI256_MASK,
29651   IX86_BUILTIN_VPERMVARHI128_MASK,
29652   IX86_BUILTIN_VPERMT2VARHI256,
29653   IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29654   IX86_BUILTIN_VPERMT2VARHI128,
29655   IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29656   IX86_BUILTIN_VPERMI2VARHI256,
29657   IX86_BUILTIN_VPERMI2VARHI128,
29658   IX86_BUILTIN_RCP14PD256,
29659   IX86_BUILTIN_RCP14PD128,
29660   IX86_BUILTIN_RCP14PS256,
29661   IX86_BUILTIN_RCP14PS128,
29662   IX86_BUILTIN_RSQRT14PD256_MASK,
29663   IX86_BUILTIN_RSQRT14PD128_MASK,
29664   IX86_BUILTIN_RSQRT14PS256_MASK,
29665   IX86_BUILTIN_RSQRT14PS128_MASK,
29666   IX86_BUILTIN_SQRTPD256_MASK,
29667   IX86_BUILTIN_SQRTPD128_MASK,
29668   IX86_BUILTIN_SQRTPS256_MASK,
29669   IX86_BUILTIN_SQRTPS128_MASK,
29670   IX86_BUILTIN_PADDB128_MASK,
29671   IX86_BUILTIN_PADDW128_MASK,
29672   IX86_BUILTIN_PADDD128_MASK,
29673   IX86_BUILTIN_PADDQ128_MASK,
29674   IX86_BUILTIN_PSUBB128_MASK,
29675   IX86_BUILTIN_PSUBW128_MASK,
29676   IX86_BUILTIN_PSUBD128_MASK,
29677   IX86_BUILTIN_PSUBQ128_MASK,
29678   IX86_BUILTIN_PADDSB128_MASK,
29679   IX86_BUILTIN_PADDSW128_MASK,
29680   IX86_BUILTIN_PSUBSB128_MASK,
29681   IX86_BUILTIN_PSUBSW128_MASK,
29682   IX86_BUILTIN_PADDUSB128_MASK,
29683   IX86_BUILTIN_PADDUSW128_MASK,
29684   IX86_BUILTIN_PSUBUSB128_MASK,
29685   IX86_BUILTIN_PSUBUSW128_MASK,
29686   IX86_BUILTIN_PADDB256_MASK,
29687   IX86_BUILTIN_PADDW256_MASK,
29688   IX86_BUILTIN_PADDD256_MASK,
29689   IX86_BUILTIN_PADDQ256_MASK,
29690   IX86_BUILTIN_PADDSB256_MASK,
29691   IX86_BUILTIN_PADDSW256_MASK,
29692   IX86_BUILTIN_PADDUSB256_MASK,
29693   IX86_BUILTIN_PADDUSW256_MASK,
29694   IX86_BUILTIN_PSUBB256_MASK,
29695   IX86_BUILTIN_PSUBW256_MASK,
29696   IX86_BUILTIN_PSUBD256_MASK,
29697   IX86_BUILTIN_PSUBQ256_MASK,
29698   IX86_BUILTIN_PSUBSB256_MASK,
29699   IX86_BUILTIN_PSUBSW256_MASK,
29700   IX86_BUILTIN_PSUBUSB256_MASK,
29701   IX86_BUILTIN_PSUBUSW256_MASK,
29702   IX86_BUILTIN_SHUF_F64x2_256,
29703   IX86_BUILTIN_SHUF_I64x2_256,
29704   IX86_BUILTIN_SHUF_I32x4_256,
29705   IX86_BUILTIN_SHUF_F32x4_256,
29706   IX86_BUILTIN_PMOVWB128,
29707   IX86_BUILTIN_PMOVWB256,
29708   IX86_BUILTIN_PMOVSWB128,
29709   IX86_BUILTIN_PMOVSWB256,
29710   IX86_BUILTIN_PMOVUSWB128,
29711   IX86_BUILTIN_PMOVUSWB256,
29712   IX86_BUILTIN_PMOVDB128,
29713   IX86_BUILTIN_PMOVDB256,
29714   IX86_BUILTIN_PMOVSDB128,
29715   IX86_BUILTIN_PMOVSDB256,
29716   IX86_BUILTIN_PMOVUSDB128,
29717   IX86_BUILTIN_PMOVUSDB256,
29718   IX86_BUILTIN_PMOVDW128,
29719   IX86_BUILTIN_PMOVDW256,
29720   IX86_BUILTIN_PMOVSDW128,
29721   IX86_BUILTIN_PMOVSDW256,
29722   IX86_BUILTIN_PMOVUSDW128,
29723   IX86_BUILTIN_PMOVUSDW256,
29724   IX86_BUILTIN_PMOVQB128,
29725   IX86_BUILTIN_PMOVQB256,
29726   IX86_BUILTIN_PMOVSQB128,
29727   IX86_BUILTIN_PMOVSQB256,
29728   IX86_BUILTIN_PMOVUSQB128,
29729   IX86_BUILTIN_PMOVUSQB256,
29730   IX86_BUILTIN_PMOVQW128,
29731   IX86_BUILTIN_PMOVQW256,
29732   IX86_BUILTIN_PMOVSQW128,
29733   IX86_BUILTIN_PMOVSQW256,
29734   IX86_BUILTIN_PMOVUSQW128,
29735   IX86_BUILTIN_PMOVUSQW256,
29736   IX86_BUILTIN_PMOVQD128,
29737   IX86_BUILTIN_PMOVQD256,
29738   IX86_BUILTIN_PMOVSQD128,
29739   IX86_BUILTIN_PMOVSQD256,
29740   IX86_BUILTIN_PMOVUSQD128,
29741   IX86_BUILTIN_PMOVUSQD256,
29742   IX86_BUILTIN_RANGEPD256,
29743   IX86_BUILTIN_RANGEPD128,
29744   IX86_BUILTIN_RANGEPS256,
29745   IX86_BUILTIN_RANGEPS128,
29746   IX86_BUILTIN_GETEXPPS256,
29747   IX86_BUILTIN_GETEXPPD256,
29748   IX86_BUILTIN_GETEXPPS128,
29749   IX86_BUILTIN_GETEXPPD128,
29750   IX86_BUILTIN_FIXUPIMMPD256_MASK,
29751   IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29752   IX86_BUILTIN_FIXUPIMMPS256_MASK,
29753   IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29754   IX86_BUILTIN_FIXUPIMMPD128_MASK,
29755   IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29756   IX86_BUILTIN_FIXUPIMMPS128_MASK,
29757   IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29758   IX86_BUILTIN_PABSQ256,
29759   IX86_BUILTIN_PABSQ128,
29760   IX86_BUILTIN_PABSD256_MASK,
29761   IX86_BUILTIN_PABSD128_MASK,
29762   IX86_BUILTIN_PMULHRSW256_MASK,
29763   IX86_BUILTIN_PMULHRSW128_MASK,
29764   IX86_BUILTIN_PMULHUW128_MASK,
29765   IX86_BUILTIN_PMULHUW256_MASK,
29766   IX86_BUILTIN_PMULHW256_MASK,
29767   IX86_BUILTIN_PMULHW128_MASK,
29768   IX86_BUILTIN_PMULLW256_MASK,
29769   IX86_BUILTIN_PMULLW128_MASK,
29770   IX86_BUILTIN_PMULLQ256,
29771   IX86_BUILTIN_PMULLQ128,
29772   IX86_BUILTIN_ANDPD256_MASK,
29773   IX86_BUILTIN_ANDPD128_MASK,
29774   IX86_BUILTIN_ANDPS256_MASK,
29775   IX86_BUILTIN_ANDPS128_MASK,
29776   IX86_BUILTIN_ANDNPD256_MASK,
29777   IX86_BUILTIN_ANDNPD128_MASK,
29778   IX86_BUILTIN_ANDNPS256_MASK,
29779   IX86_BUILTIN_ANDNPS128_MASK,
29780   IX86_BUILTIN_PSLLWI128_MASK,
29781   IX86_BUILTIN_PSLLDI128_MASK,
29782   IX86_BUILTIN_PSLLQI128_MASK,
29783   IX86_BUILTIN_PSLLW128_MASK,
29784   IX86_BUILTIN_PSLLD128_MASK,
29785   IX86_BUILTIN_PSLLQ128_MASK,
29786   IX86_BUILTIN_PSLLWI256_MASK ,
29787   IX86_BUILTIN_PSLLW256_MASK,
29788   IX86_BUILTIN_PSLLDI256_MASK,
29789   IX86_BUILTIN_PSLLD256_MASK,
29790   IX86_BUILTIN_PSLLQI256_MASK,
29791   IX86_BUILTIN_PSLLQ256_MASK,
29792   IX86_BUILTIN_PSRADI128_MASK,
29793   IX86_BUILTIN_PSRAD128_MASK,
29794   IX86_BUILTIN_PSRADI256_MASK,
29795   IX86_BUILTIN_PSRAD256_MASK,
29796   IX86_BUILTIN_PSRAQI128_MASK,
29797   IX86_BUILTIN_PSRAQ128_MASK,
29798   IX86_BUILTIN_PSRAQI256_MASK,
29799   IX86_BUILTIN_PSRAQ256_MASK,
29800   IX86_BUILTIN_PANDD256,
29801   IX86_BUILTIN_PANDD128,
29802   IX86_BUILTIN_PSRLDI128_MASK,
29803   IX86_BUILTIN_PSRLD128_MASK,
29804   IX86_BUILTIN_PSRLDI256_MASK,
29805   IX86_BUILTIN_PSRLD256_MASK,
29806   IX86_BUILTIN_PSRLQI128_MASK,
29807   IX86_BUILTIN_PSRLQ128_MASK,
29808   IX86_BUILTIN_PSRLQI256_MASK,
29809   IX86_BUILTIN_PSRLQ256_MASK,
29810   IX86_BUILTIN_PANDQ256,
29811   IX86_BUILTIN_PANDQ128,
29812   IX86_BUILTIN_PANDND256,
29813   IX86_BUILTIN_PANDND128,
29814   IX86_BUILTIN_PANDNQ256,
29815   IX86_BUILTIN_PANDNQ128,
29816   IX86_BUILTIN_PORD256,
29817   IX86_BUILTIN_PORD128,
29818   IX86_BUILTIN_PORQ256,
29819   IX86_BUILTIN_PORQ128,
29820   IX86_BUILTIN_PXORD256,
29821   IX86_BUILTIN_PXORD128,
29822   IX86_BUILTIN_PXORQ256,
29823   IX86_BUILTIN_PXORQ128,
29824   IX86_BUILTIN_PACKSSWB256_MASK,
29825   IX86_BUILTIN_PACKSSWB128_MASK,
29826   IX86_BUILTIN_PACKUSWB256_MASK,
29827   IX86_BUILTIN_PACKUSWB128_MASK,
29828   IX86_BUILTIN_RNDSCALEPS256,
29829   IX86_BUILTIN_RNDSCALEPD256,
29830   IX86_BUILTIN_RNDSCALEPS128,
29831   IX86_BUILTIN_RNDSCALEPD128,
29832   IX86_BUILTIN_VTERNLOGQ256_MASK,
29833   IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29834   IX86_BUILTIN_VTERNLOGD256_MASK,
29835   IX86_BUILTIN_VTERNLOGD256_MASKZ,
29836   IX86_BUILTIN_VTERNLOGQ128_MASK,
29837   IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29838   IX86_BUILTIN_VTERNLOGD128_MASK,
29839   IX86_BUILTIN_VTERNLOGD128_MASKZ,
29840   IX86_BUILTIN_SCALEFPD256,
29841   IX86_BUILTIN_SCALEFPS256,
29842   IX86_BUILTIN_SCALEFPD128,
29843   IX86_BUILTIN_SCALEFPS128,
29844   IX86_BUILTIN_VFMADDPD256_MASK,
29845   IX86_BUILTIN_VFMADDPD256_MASK3,
29846   IX86_BUILTIN_VFMADDPD256_MASKZ,
29847   IX86_BUILTIN_VFMADDPD128_MASK,
29848   IX86_BUILTIN_VFMADDPD128_MASK3,
29849   IX86_BUILTIN_VFMADDPD128_MASKZ,
29850   IX86_BUILTIN_VFMADDPS256_MASK,
29851   IX86_BUILTIN_VFMADDPS256_MASK3,
29852   IX86_BUILTIN_VFMADDPS256_MASKZ,
29853   IX86_BUILTIN_VFMADDPS128_MASK,
29854   IX86_BUILTIN_VFMADDPS128_MASK3,
29855   IX86_BUILTIN_VFMADDPS128_MASKZ,
29856   IX86_BUILTIN_VFMSUBPD256_MASK3,
29857   IX86_BUILTIN_VFMSUBPD128_MASK3,
29858   IX86_BUILTIN_VFMSUBPS256_MASK3,
29859   IX86_BUILTIN_VFMSUBPS128_MASK3,
29860   IX86_BUILTIN_VFNMADDPD256_MASK,
29861   IX86_BUILTIN_VFNMADDPD128_MASK,
29862   IX86_BUILTIN_VFNMADDPS256_MASK,
29863   IX86_BUILTIN_VFNMADDPS128_MASK,
29864   IX86_BUILTIN_VFNMSUBPD256_MASK,
29865   IX86_BUILTIN_VFNMSUBPD256_MASK3,
29866   IX86_BUILTIN_VFNMSUBPD128_MASK,
29867   IX86_BUILTIN_VFNMSUBPD128_MASK3,
29868   IX86_BUILTIN_VFNMSUBPS256_MASK,
29869   IX86_BUILTIN_VFNMSUBPS256_MASK3,
29870   IX86_BUILTIN_VFNMSUBPS128_MASK,
29871   IX86_BUILTIN_VFNMSUBPS128_MASK3,
29872   IX86_BUILTIN_VFMADDSUBPD256_MASK,
29873   IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29874   IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29875   IX86_BUILTIN_VFMADDSUBPD128_MASK,
29876   IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29877   IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29878   IX86_BUILTIN_VFMADDSUBPS256_MASK,
29879   IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29880   IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29881   IX86_BUILTIN_VFMADDSUBPS128_MASK,
29882   IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29883   IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29884   IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29885   IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29886   IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29887   IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29888   IX86_BUILTIN_INSERTF64X2_256,
29889   IX86_BUILTIN_INSERTI64X2_256,
29890   IX86_BUILTIN_PSRAVV16HI,
29891   IX86_BUILTIN_PSRAVV8HI,
29892   IX86_BUILTIN_PMADDUBSW256_MASK,
29893   IX86_BUILTIN_PMADDUBSW128_MASK,
29894   IX86_BUILTIN_PMADDWD256_MASK,
29895   IX86_BUILTIN_PMADDWD128_MASK,
29896   IX86_BUILTIN_PSRLVV16HI,
29897   IX86_BUILTIN_PSRLVV8HI,
29898   IX86_BUILTIN_CVTPS2DQ256_MASK,
29899   IX86_BUILTIN_CVTPS2DQ128_MASK,
29900   IX86_BUILTIN_CVTPS2UDQ256,
29901   IX86_BUILTIN_CVTPS2UDQ128,
29902   IX86_BUILTIN_CVTPS2QQ256,
29903   IX86_BUILTIN_CVTPS2QQ128,
29904   IX86_BUILTIN_CVTPS2UQQ256,
29905   IX86_BUILTIN_CVTPS2UQQ128,
29906   IX86_BUILTIN_GETMANTPS256,
29907   IX86_BUILTIN_GETMANTPS128,
29908   IX86_BUILTIN_GETMANTPD256,
29909   IX86_BUILTIN_GETMANTPD128,
29910   IX86_BUILTIN_MOVDDUP256_MASK,
29911   IX86_BUILTIN_MOVDDUP128_MASK,
29912   IX86_BUILTIN_MOVSHDUP256_MASK,
29913   IX86_BUILTIN_MOVSHDUP128_MASK,
29914   IX86_BUILTIN_MOVSLDUP256_MASK,
29915   IX86_BUILTIN_MOVSLDUP128_MASK,
29916   IX86_BUILTIN_CVTQQ2PS256,
29917   IX86_BUILTIN_CVTQQ2PS128,
29918   IX86_BUILTIN_CVTUQQ2PS256,
29919   IX86_BUILTIN_CVTUQQ2PS128,
29920   IX86_BUILTIN_CVTQQ2PD256,
29921   IX86_BUILTIN_CVTQQ2PD128,
29922   IX86_BUILTIN_CVTUQQ2PD256,
29923   IX86_BUILTIN_CVTUQQ2PD128,
29924   IX86_BUILTIN_VPERMT2VARQ256,
29925   IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29926   IX86_BUILTIN_VPERMT2VARD256,
29927   IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29928   IX86_BUILTIN_VPERMI2VARQ256,
29929   IX86_BUILTIN_VPERMI2VARD256,
29930   IX86_BUILTIN_VPERMT2VARPD256,
29931   IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29932   IX86_BUILTIN_VPERMT2VARPS256,
29933   IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29934   IX86_BUILTIN_VPERMI2VARPD256,
29935   IX86_BUILTIN_VPERMI2VARPS256,
29936   IX86_BUILTIN_VPERMT2VARQ128,
29937   IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29938   IX86_BUILTIN_VPERMT2VARD128,
29939   IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29940   IX86_BUILTIN_VPERMI2VARQ128,
29941   IX86_BUILTIN_VPERMI2VARD128,
29942   IX86_BUILTIN_VPERMT2VARPD128,
29943   IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29944   IX86_BUILTIN_VPERMT2VARPS128,
29945   IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29946   IX86_BUILTIN_VPERMI2VARPD128,
29947   IX86_BUILTIN_VPERMI2VARPS128,
29948   IX86_BUILTIN_PSHUFB256_MASK,
29949   IX86_BUILTIN_PSHUFB128_MASK,
29950   IX86_BUILTIN_PSHUFHW256_MASK,
29951   IX86_BUILTIN_PSHUFHW128_MASK,
29952   IX86_BUILTIN_PSHUFLW256_MASK,
29953   IX86_BUILTIN_PSHUFLW128_MASK,
29954   IX86_BUILTIN_PSHUFD256_MASK,
29955   IX86_BUILTIN_PSHUFD128_MASK,
29956   IX86_BUILTIN_SHUFPD256_MASK,
29957   IX86_BUILTIN_SHUFPD128_MASK,
29958   IX86_BUILTIN_SHUFPS256_MASK,
29959   IX86_BUILTIN_SHUFPS128_MASK,
29960   IX86_BUILTIN_PROLVQ256,
29961   IX86_BUILTIN_PROLVQ128,
29962   IX86_BUILTIN_PROLQ256,
29963   IX86_BUILTIN_PROLQ128,
29964   IX86_BUILTIN_PRORVQ256,
29965   IX86_BUILTIN_PRORVQ128,
29966   IX86_BUILTIN_PRORQ256,
29967   IX86_BUILTIN_PRORQ128,
29968   IX86_BUILTIN_PSRAVQ128,
29969   IX86_BUILTIN_PSRAVQ256,
29970   IX86_BUILTIN_PSLLVV4DI_MASK,
29971   IX86_BUILTIN_PSLLVV2DI_MASK,
29972   IX86_BUILTIN_PSLLVV8SI_MASK,
29973   IX86_BUILTIN_PSLLVV4SI_MASK,
29974   IX86_BUILTIN_PSRAVV8SI_MASK,
29975   IX86_BUILTIN_PSRAVV4SI_MASK,
29976   IX86_BUILTIN_PSRLVV4DI_MASK,
29977   IX86_BUILTIN_PSRLVV2DI_MASK,
29978   IX86_BUILTIN_PSRLVV8SI_MASK,
29979   IX86_BUILTIN_PSRLVV4SI_MASK,
29980   IX86_BUILTIN_PSRAWI256_MASK,
29981   IX86_BUILTIN_PSRAW256_MASK,
29982   IX86_BUILTIN_PSRAWI128_MASK,
29983   IX86_BUILTIN_PSRAW128_MASK,
29984   IX86_BUILTIN_PSRLWI256_MASK,
29985   IX86_BUILTIN_PSRLW256_MASK,
29986   IX86_BUILTIN_PSRLWI128_MASK,
29987   IX86_BUILTIN_PSRLW128_MASK,
29988   IX86_BUILTIN_PRORVD256,
29989   IX86_BUILTIN_PROLVD256,
29990   IX86_BUILTIN_PRORD256,
29991   IX86_BUILTIN_PROLD256,
29992   IX86_BUILTIN_PRORVD128,
29993   IX86_BUILTIN_PROLVD128,
29994   IX86_BUILTIN_PRORD128,
29995   IX86_BUILTIN_PROLD128,
29996   IX86_BUILTIN_FPCLASSPD256,
29997   IX86_BUILTIN_FPCLASSPD128,
29998   IX86_BUILTIN_FPCLASSSD,
29999   IX86_BUILTIN_FPCLASSPS256,
30000   IX86_BUILTIN_FPCLASSPS128,
30001   IX86_BUILTIN_FPCLASSSS,
30002   IX86_BUILTIN_CVTB2MASK128,
30003   IX86_BUILTIN_CVTB2MASK256,
30004   IX86_BUILTIN_CVTW2MASK128,
30005   IX86_BUILTIN_CVTW2MASK256,
30006   IX86_BUILTIN_CVTD2MASK128,
30007   IX86_BUILTIN_CVTD2MASK256,
30008   IX86_BUILTIN_CVTQ2MASK128,
30009   IX86_BUILTIN_CVTQ2MASK256,
30010   IX86_BUILTIN_CVTMASK2B128,
30011   IX86_BUILTIN_CVTMASK2B256,
30012   IX86_BUILTIN_CVTMASK2W128,
30013   IX86_BUILTIN_CVTMASK2W256,
30014   IX86_BUILTIN_CVTMASK2D128,
30015   IX86_BUILTIN_CVTMASK2D256,
30016   IX86_BUILTIN_CVTMASK2Q128,
30017   IX86_BUILTIN_CVTMASK2Q256,
30018   IX86_BUILTIN_PCMPEQB128_MASK,
30019   IX86_BUILTIN_PCMPEQB256_MASK,
30020   IX86_BUILTIN_PCMPEQW128_MASK,
30021   IX86_BUILTIN_PCMPEQW256_MASK,
30022   IX86_BUILTIN_PCMPEQD128_MASK,
30023   IX86_BUILTIN_PCMPEQD256_MASK,
30024   IX86_BUILTIN_PCMPEQQ128_MASK,
30025   IX86_BUILTIN_PCMPEQQ256_MASK,
30026   IX86_BUILTIN_PCMPGTB128_MASK,
30027   IX86_BUILTIN_PCMPGTB256_MASK,
30028   IX86_BUILTIN_PCMPGTW128_MASK,
30029   IX86_BUILTIN_PCMPGTW256_MASK,
30030   IX86_BUILTIN_PCMPGTD128_MASK,
30031   IX86_BUILTIN_PCMPGTD256_MASK,
30032   IX86_BUILTIN_PCMPGTQ128_MASK,
30033   IX86_BUILTIN_PCMPGTQ256_MASK,
30034   IX86_BUILTIN_PTESTMB128,
30035   IX86_BUILTIN_PTESTMB256,
30036   IX86_BUILTIN_PTESTMW128,
30037   IX86_BUILTIN_PTESTMW256,
30038   IX86_BUILTIN_PTESTMD128,
30039   IX86_BUILTIN_PTESTMD256,
30040   IX86_BUILTIN_PTESTMQ128,
30041   IX86_BUILTIN_PTESTMQ256,
30042   IX86_BUILTIN_PTESTNMB128,
30043   IX86_BUILTIN_PTESTNMB256,
30044   IX86_BUILTIN_PTESTNMW128,
30045   IX86_BUILTIN_PTESTNMW256,
30046   IX86_BUILTIN_PTESTNMD128,
30047   IX86_BUILTIN_PTESTNMD256,
30048   IX86_BUILTIN_PTESTNMQ128,
30049   IX86_BUILTIN_PTESTNMQ256,
30050   IX86_BUILTIN_PBROADCASTMB128,
30051   IX86_BUILTIN_PBROADCASTMB256,
30052   IX86_BUILTIN_PBROADCASTMW128,
30053   IX86_BUILTIN_PBROADCASTMW256,
30054   IX86_BUILTIN_COMPRESSPD256,
30055   IX86_BUILTIN_COMPRESSPD128,
30056   IX86_BUILTIN_COMPRESSPS256,
30057   IX86_BUILTIN_COMPRESSPS128,
30058   IX86_BUILTIN_PCOMPRESSQ256,
30059   IX86_BUILTIN_PCOMPRESSQ128,
30060   IX86_BUILTIN_PCOMPRESSD256,
30061   IX86_BUILTIN_PCOMPRESSD128,
30062   IX86_BUILTIN_EXPANDPD256,
30063   IX86_BUILTIN_EXPANDPD128,
30064   IX86_BUILTIN_EXPANDPS256,
30065   IX86_BUILTIN_EXPANDPS128,
30066   IX86_BUILTIN_PEXPANDQ256,
30067   IX86_BUILTIN_PEXPANDQ128,
30068   IX86_BUILTIN_PEXPANDD256,
30069   IX86_BUILTIN_PEXPANDD128,
30070   IX86_BUILTIN_EXPANDPD256Z,
30071   IX86_BUILTIN_EXPANDPD128Z,
30072   IX86_BUILTIN_EXPANDPS256Z,
30073   IX86_BUILTIN_EXPANDPS128Z,
30074   IX86_BUILTIN_PEXPANDQ256Z,
30075   IX86_BUILTIN_PEXPANDQ128Z,
30076   IX86_BUILTIN_PEXPANDD256Z,
30077   IX86_BUILTIN_PEXPANDD128Z,
30078   IX86_BUILTIN_PMAXSD256_MASK,
30079   IX86_BUILTIN_PMINSD256_MASK,
30080   IX86_BUILTIN_PMAXUD256_MASK,
30081   IX86_BUILTIN_PMINUD256_MASK,
30082   IX86_BUILTIN_PMAXSD128_MASK,
30083   IX86_BUILTIN_PMINSD128_MASK,
30084   IX86_BUILTIN_PMAXUD128_MASK,
30085   IX86_BUILTIN_PMINUD128_MASK,
30086   IX86_BUILTIN_PMAXSQ256_MASK,
30087   IX86_BUILTIN_PMINSQ256_MASK,
30088   IX86_BUILTIN_PMAXUQ256_MASK,
30089   IX86_BUILTIN_PMINUQ256_MASK,
30090   IX86_BUILTIN_PMAXSQ128_MASK,
30091   IX86_BUILTIN_PMINSQ128_MASK,
30092   IX86_BUILTIN_PMAXUQ128_MASK,
30093   IX86_BUILTIN_PMINUQ128_MASK,
30094   IX86_BUILTIN_PMINSB256_MASK,
30095   IX86_BUILTIN_PMINUB256_MASK,
30096   IX86_BUILTIN_PMAXSB256_MASK,
30097   IX86_BUILTIN_PMAXUB256_MASK,
30098   IX86_BUILTIN_PMINSB128_MASK,
30099   IX86_BUILTIN_PMINUB128_MASK,
30100   IX86_BUILTIN_PMAXSB128_MASK,
30101   IX86_BUILTIN_PMAXUB128_MASK,
30102   IX86_BUILTIN_PMINSW256_MASK,
30103   IX86_BUILTIN_PMINUW256_MASK,
30104   IX86_BUILTIN_PMAXSW256_MASK,
30105   IX86_BUILTIN_PMAXUW256_MASK,
30106   IX86_BUILTIN_PMINSW128_MASK,
30107   IX86_BUILTIN_PMINUW128_MASK,
30108   IX86_BUILTIN_PMAXSW128_MASK,
30109   IX86_BUILTIN_PMAXUW128_MASK,
30110   IX86_BUILTIN_VPCONFLICTQ256,
30111   IX86_BUILTIN_VPCONFLICTD256,
30112   IX86_BUILTIN_VPCLZCNTQ256,
30113   IX86_BUILTIN_VPCLZCNTD256,
30114   IX86_BUILTIN_UNPCKHPD256_MASK,
30115   IX86_BUILTIN_UNPCKHPD128_MASK,
30116   IX86_BUILTIN_UNPCKHPS256_MASK,
30117   IX86_BUILTIN_UNPCKHPS128_MASK,
30118   IX86_BUILTIN_UNPCKLPD256_MASK,
30119   IX86_BUILTIN_UNPCKLPD128_MASK,
30120   IX86_BUILTIN_UNPCKLPS256_MASK,
30121   IX86_BUILTIN_VPCONFLICTQ128,
30122   IX86_BUILTIN_VPCONFLICTD128,
30123   IX86_BUILTIN_VPCLZCNTQ128,
30124   IX86_BUILTIN_VPCLZCNTD128,
30125   IX86_BUILTIN_UNPCKLPS128_MASK,
30126   IX86_BUILTIN_ALIGND256,
30127   IX86_BUILTIN_ALIGNQ256,
30128   IX86_BUILTIN_ALIGND128,
30129   IX86_BUILTIN_ALIGNQ128,
30130   IX86_BUILTIN_CVTPS2PH256_MASK,
30131   IX86_BUILTIN_CVTPS2PH_MASK,
30132   IX86_BUILTIN_CVTPH2PS_MASK,
30133   IX86_BUILTIN_CVTPH2PS256_MASK,
30134   IX86_BUILTIN_PUNPCKHDQ128_MASK,
30135   IX86_BUILTIN_PUNPCKHDQ256_MASK,
30136   IX86_BUILTIN_PUNPCKHQDQ128_MASK,
30137   IX86_BUILTIN_PUNPCKHQDQ256_MASK,
30138   IX86_BUILTIN_PUNPCKLDQ128_MASK,
30139   IX86_BUILTIN_PUNPCKLDQ256_MASK,
30140   IX86_BUILTIN_PUNPCKLQDQ128_MASK,
30141   IX86_BUILTIN_PUNPCKLQDQ256_MASK,
30142   IX86_BUILTIN_PUNPCKHBW128_MASK,
30143   IX86_BUILTIN_PUNPCKHBW256_MASK,
30144   IX86_BUILTIN_PUNPCKHWD128_MASK,
30145   IX86_BUILTIN_PUNPCKHWD256_MASK,
30146   IX86_BUILTIN_PUNPCKLBW128_MASK,
30147   IX86_BUILTIN_PUNPCKLBW256_MASK,
30148   IX86_BUILTIN_PUNPCKLWD128_MASK,
30149   IX86_BUILTIN_PUNPCKLWD256_MASK,
30150   IX86_BUILTIN_PSLLVV16HI,
30151   IX86_BUILTIN_PSLLVV8HI,
30152   IX86_BUILTIN_PACKSSDW256_MASK,
30153   IX86_BUILTIN_PACKSSDW128_MASK,
30154   IX86_BUILTIN_PACKUSDW256_MASK,
30155   IX86_BUILTIN_PACKUSDW128_MASK,
30156   IX86_BUILTIN_PAVGB256_MASK,
30157   IX86_BUILTIN_PAVGW256_MASK,
30158   IX86_BUILTIN_PAVGB128_MASK,
30159   IX86_BUILTIN_PAVGW128_MASK,
30160   IX86_BUILTIN_VPERMVARSF256_MASK,
30161   IX86_BUILTIN_VPERMVARDF256_MASK,
30162   IX86_BUILTIN_VPERMDF256_MASK,
30163   IX86_BUILTIN_PABSB256_MASK,
30164   IX86_BUILTIN_PABSB128_MASK,
30165   IX86_BUILTIN_PABSW256_MASK,
30166   IX86_BUILTIN_PABSW128_MASK,
30167   IX86_BUILTIN_VPERMILVARPD_MASK,
30168   IX86_BUILTIN_VPERMILVARPS_MASK,
30169   IX86_BUILTIN_VPERMILVARPD256_MASK,
30170   IX86_BUILTIN_VPERMILVARPS256_MASK,
30171   IX86_BUILTIN_VPERMILPD_MASK,
30172   IX86_BUILTIN_VPERMILPS_MASK,
30173   IX86_BUILTIN_VPERMILPD256_MASK,
30174   IX86_BUILTIN_VPERMILPS256_MASK,
30175   IX86_BUILTIN_BLENDMQ256,
30176   IX86_BUILTIN_BLENDMD256,
30177   IX86_BUILTIN_BLENDMPD256,
30178   IX86_BUILTIN_BLENDMPS256,
30179   IX86_BUILTIN_BLENDMQ128,
30180   IX86_BUILTIN_BLENDMD128,
30181   IX86_BUILTIN_BLENDMPD128,
30182   IX86_BUILTIN_BLENDMPS128,
30183   IX86_BUILTIN_BLENDMW256,
30184   IX86_BUILTIN_BLENDMB256,
30185   IX86_BUILTIN_BLENDMW128,
30186   IX86_BUILTIN_BLENDMB128,
30187   IX86_BUILTIN_PMULLD256_MASK,
30188   IX86_BUILTIN_PMULLD128_MASK,
30189   IX86_BUILTIN_PMULUDQ256_MASK,
30190   IX86_BUILTIN_PMULDQ256_MASK,
30191   IX86_BUILTIN_PMULDQ128_MASK,
30192   IX86_BUILTIN_PMULUDQ128_MASK,
30193   IX86_BUILTIN_CVTPD2PS256_MASK,
30194   IX86_BUILTIN_CVTPD2PS_MASK,
30195   IX86_BUILTIN_VPERMVARSI256_MASK,
30196   IX86_BUILTIN_VPERMVARDI256_MASK,
30197   IX86_BUILTIN_VPERMDI256_MASK,
30198   IX86_BUILTIN_CMPQ256,
30199   IX86_BUILTIN_CMPD256,
30200   IX86_BUILTIN_UCMPQ256,
30201   IX86_BUILTIN_UCMPD256,
30202   IX86_BUILTIN_CMPB256,
30203   IX86_BUILTIN_CMPW256,
30204   IX86_BUILTIN_UCMPB256,
30205   IX86_BUILTIN_UCMPW256,
30206   IX86_BUILTIN_CMPPD256_MASK,
30207   IX86_BUILTIN_CMPPS256_MASK,
30208   IX86_BUILTIN_CMPQ128,
30209   IX86_BUILTIN_CMPD128,
30210   IX86_BUILTIN_UCMPQ128,
30211   IX86_BUILTIN_UCMPD128,
30212   IX86_BUILTIN_CMPB128,
30213   IX86_BUILTIN_CMPW128,
30214   IX86_BUILTIN_UCMPB128,
30215   IX86_BUILTIN_UCMPW128,
30216   IX86_BUILTIN_CMPPD128_MASK,
30217   IX86_BUILTIN_CMPPS128_MASK,
30218 
30219   IX86_BUILTIN_GATHER3SIV8SF,
30220   IX86_BUILTIN_GATHER3SIV4SF,
30221   IX86_BUILTIN_GATHER3SIV4DF,
30222   IX86_BUILTIN_GATHER3SIV2DF,
30223   IX86_BUILTIN_GATHER3DIV8SF,
30224   IX86_BUILTIN_GATHER3DIV4SF,
30225   IX86_BUILTIN_GATHER3DIV4DF,
30226   IX86_BUILTIN_GATHER3DIV2DF,
30227   IX86_BUILTIN_GATHER3SIV8SI,
30228   IX86_BUILTIN_GATHER3SIV4SI,
30229   IX86_BUILTIN_GATHER3SIV4DI,
30230   IX86_BUILTIN_GATHER3SIV2DI,
30231   IX86_BUILTIN_GATHER3DIV8SI,
30232   IX86_BUILTIN_GATHER3DIV4SI,
30233   IX86_BUILTIN_GATHER3DIV4DI,
30234   IX86_BUILTIN_GATHER3DIV2DI,
30235   IX86_BUILTIN_SCATTERSIV8SF,
30236   IX86_BUILTIN_SCATTERSIV4SF,
30237   IX86_BUILTIN_SCATTERSIV4DF,
30238   IX86_BUILTIN_SCATTERSIV2DF,
30239   IX86_BUILTIN_SCATTERDIV8SF,
30240   IX86_BUILTIN_SCATTERDIV4SF,
30241   IX86_BUILTIN_SCATTERDIV4DF,
30242   IX86_BUILTIN_SCATTERDIV2DF,
30243   IX86_BUILTIN_SCATTERSIV8SI,
30244   IX86_BUILTIN_SCATTERSIV4SI,
30245   IX86_BUILTIN_SCATTERSIV4DI,
30246   IX86_BUILTIN_SCATTERSIV2DI,
30247   IX86_BUILTIN_SCATTERDIV8SI,
30248   IX86_BUILTIN_SCATTERDIV4SI,
30249   IX86_BUILTIN_SCATTERDIV4DI,
30250   IX86_BUILTIN_SCATTERDIV2DI,
30251 
30252   /* AVX512DQ.  */
30253   IX86_BUILTIN_RANGESD128,
30254   IX86_BUILTIN_RANGESS128,
30255   IX86_BUILTIN_KUNPCKWD,
30256   IX86_BUILTIN_KUNPCKDQ,
30257   IX86_BUILTIN_BROADCASTF32x2_512,
30258   IX86_BUILTIN_BROADCASTI32x2_512,
30259   IX86_BUILTIN_BROADCASTF64X2_512,
30260   IX86_BUILTIN_BROADCASTI64X2_512,
30261   IX86_BUILTIN_BROADCASTF32X8_512,
30262   IX86_BUILTIN_BROADCASTI32X8_512,
30263   IX86_BUILTIN_EXTRACTF64X2_512,
30264   IX86_BUILTIN_EXTRACTF32X8,
30265   IX86_BUILTIN_EXTRACTI64X2_512,
30266   IX86_BUILTIN_EXTRACTI32X8,
30267   IX86_BUILTIN_REDUCEPD512_MASK,
30268   IX86_BUILTIN_REDUCEPS512_MASK,
30269   IX86_BUILTIN_PMULLQ512,
30270   IX86_BUILTIN_XORPD512,
30271   IX86_BUILTIN_XORPS512,
30272   IX86_BUILTIN_ORPD512,
30273   IX86_BUILTIN_ORPS512,
30274   IX86_BUILTIN_ANDPD512,
30275   IX86_BUILTIN_ANDPS512,
30276   IX86_BUILTIN_ANDNPD512,
30277   IX86_BUILTIN_ANDNPS512,
30278   IX86_BUILTIN_INSERTF32X8,
30279   IX86_BUILTIN_INSERTI32X8,
30280   IX86_BUILTIN_INSERTF64X2_512,
30281   IX86_BUILTIN_INSERTI64X2_512,
30282   IX86_BUILTIN_FPCLASSPD512,
30283   IX86_BUILTIN_FPCLASSPS512,
30284   IX86_BUILTIN_CVTD2MASK512,
30285   IX86_BUILTIN_CVTQ2MASK512,
30286   IX86_BUILTIN_CVTMASK2D512,
30287   IX86_BUILTIN_CVTMASK2Q512,
30288   IX86_BUILTIN_CVTPD2QQ512,
30289   IX86_BUILTIN_CVTPS2QQ512,
30290   IX86_BUILTIN_CVTPD2UQQ512,
30291   IX86_BUILTIN_CVTPS2UQQ512,
30292   IX86_BUILTIN_CVTQQ2PS512,
30293   IX86_BUILTIN_CVTUQQ2PS512,
30294   IX86_BUILTIN_CVTQQ2PD512,
30295   IX86_BUILTIN_CVTUQQ2PD512,
30296   IX86_BUILTIN_CVTTPS2QQ512,
30297   IX86_BUILTIN_CVTTPS2UQQ512,
30298   IX86_BUILTIN_CVTTPD2QQ512,
30299   IX86_BUILTIN_CVTTPD2UQQ512,
30300   IX86_BUILTIN_RANGEPS512,
30301   IX86_BUILTIN_RANGEPD512,
30302 
30303   /* AVX512BW.  */
30304   IX86_BUILTIN_PACKUSDW512,
30305   IX86_BUILTIN_PACKSSDW512,
30306   IX86_BUILTIN_LOADDQUHI512_MASK,
30307   IX86_BUILTIN_LOADDQUQI512_MASK,
30308   IX86_BUILTIN_PSLLDQ512,
30309   IX86_BUILTIN_PSRLDQ512,
30310   IX86_BUILTIN_STOREDQUHI512_MASK,
30311   IX86_BUILTIN_STOREDQUQI512_MASK,
30312   IX86_BUILTIN_PALIGNR512,
30313   IX86_BUILTIN_PALIGNR512_MASK,
30314   IX86_BUILTIN_MOVDQUHI512_MASK,
30315   IX86_BUILTIN_MOVDQUQI512_MASK,
30316   IX86_BUILTIN_PSADBW512,
30317   IX86_BUILTIN_DBPSADBW512,
30318   IX86_BUILTIN_PBROADCASTB512,
30319   IX86_BUILTIN_PBROADCASTB512_GPR,
30320   IX86_BUILTIN_PBROADCASTW512,
30321   IX86_BUILTIN_PBROADCASTW512_GPR,
30322   IX86_BUILTIN_PMOVSXBW512_MASK,
30323   IX86_BUILTIN_PMOVZXBW512_MASK,
30324   IX86_BUILTIN_VPERMVARHI512_MASK,
30325   IX86_BUILTIN_VPERMT2VARHI512,
30326   IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30327   IX86_BUILTIN_VPERMI2VARHI512,
30328   IX86_BUILTIN_PAVGB512,
30329   IX86_BUILTIN_PAVGW512,
30330   IX86_BUILTIN_PADDB512,
30331   IX86_BUILTIN_PSUBB512,
30332   IX86_BUILTIN_PSUBSB512,
30333   IX86_BUILTIN_PADDSB512,
30334   IX86_BUILTIN_PSUBUSB512,
30335   IX86_BUILTIN_PADDUSB512,
30336   IX86_BUILTIN_PSUBW512,
30337   IX86_BUILTIN_PADDW512,
30338   IX86_BUILTIN_PSUBSW512,
30339   IX86_BUILTIN_PADDSW512,
30340   IX86_BUILTIN_PSUBUSW512,
30341   IX86_BUILTIN_PADDUSW512,
30342   IX86_BUILTIN_PMAXUW512,
30343   IX86_BUILTIN_PMAXSW512,
30344   IX86_BUILTIN_PMINUW512,
30345   IX86_BUILTIN_PMINSW512,
30346   IX86_BUILTIN_PMAXUB512,
30347   IX86_BUILTIN_PMAXSB512,
30348   IX86_BUILTIN_PMINUB512,
30349   IX86_BUILTIN_PMINSB512,
30350   IX86_BUILTIN_PMOVWB512,
30351   IX86_BUILTIN_PMOVSWB512,
30352   IX86_BUILTIN_PMOVUSWB512,
30353   IX86_BUILTIN_PMULHRSW512_MASK,
30354   IX86_BUILTIN_PMULHUW512_MASK,
30355   IX86_BUILTIN_PMULHW512_MASK,
30356   IX86_BUILTIN_PMULLW512_MASK,
30357   IX86_BUILTIN_PSLLWI512_MASK,
30358   IX86_BUILTIN_PSLLW512_MASK,
30359   IX86_BUILTIN_PACKSSWB512,
30360   IX86_BUILTIN_PACKUSWB512,
30361   IX86_BUILTIN_PSRAVV32HI,
30362   IX86_BUILTIN_PMADDUBSW512_MASK,
30363   IX86_BUILTIN_PMADDWD512_MASK,
30364   IX86_BUILTIN_PSRLVV32HI,
30365   IX86_BUILTIN_PUNPCKHBW512,
30366   IX86_BUILTIN_PUNPCKHWD512,
30367   IX86_BUILTIN_PUNPCKLBW512,
30368   IX86_BUILTIN_PUNPCKLWD512,
30369   IX86_BUILTIN_PSHUFB512,
30370   IX86_BUILTIN_PSHUFHW512,
30371   IX86_BUILTIN_PSHUFLW512,
30372   IX86_BUILTIN_PSRAWI512,
30373   IX86_BUILTIN_PSRAW512,
30374   IX86_BUILTIN_PSRLWI512,
30375   IX86_BUILTIN_PSRLW512,
30376   IX86_BUILTIN_CVTB2MASK512,
30377   IX86_BUILTIN_CVTW2MASK512,
30378   IX86_BUILTIN_CVTMASK2B512,
30379   IX86_BUILTIN_CVTMASK2W512,
30380   IX86_BUILTIN_PCMPEQB512_MASK,
30381   IX86_BUILTIN_PCMPEQW512_MASK,
30382   IX86_BUILTIN_PCMPGTB512_MASK,
30383   IX86_BUILTIN_PCMPGTW512_MASK,
30384   IX86_BUILTIN_PTESTMB512,
30385   IX86_BUILTIN_PTESTMW512,
30386   IX86_BUILTIN_PTESTNMB512,
30387   IX86_BUILTIN_PTESTNMW512,
30388   IX86_BUILTIN_PSLLVV32HI,
30389   IX86_BUILTIN_PABSB512,
30390   IX86_BUILTIN_PABSW512,
30391   IX86_BUILTIN_BLENDMW512,
30392   IX86_BUILTIN_BLENDMB512,
30393   IX86_BUILTIN_CMPB512,
30394   IX86_BUILTIN_CMPW512,
30395   IX86_BUILTIN_UCMPB512,
30396   IX86_BUILTIN_UCMPW512,
30397 
30398   /* Alternate 4 and 8 element gather/scatter for the vectorizer
30399      where all operands are 32-byte or 64-byte wide respectively.  */
30400   IX86_BUILTIN_GATHERALTSIV4DF,
30401   IX86_BUILTIN_GATHERALTDIV8SF,
30402   IX86_BUILTIN_GATHERALTSIV4DI,
30403   IX86_BUILTIN_GATHERALTDIV8SI,
30404   IX86_BUILTIN_GATHER3ALTDIV16SF,
30405   IX86_BUILTIN_GATHER3ALTDIV16SI,
30406   IX86_BUILTIN_GATHER3ALTSIV4DF,
30407   IX86_BUILTIN_GATHER3ALTDIV8SF,
30408   IX86_BUILTIN_GATHER3ALTSIV4DI,
30409   IX86_BUILTIN_GATHER3ALTDIV8SI,
30410   IX86_BUILTIN_GATHER3ALTSIV8DF,
30411   IX86_BUILTIN_GATHER3ALTSIV8DI,
30412   IX86_BUILTIN_GATHER3DIV16SF,
30413   IX86_BUILTIN_GATHER3DIV16SI,
30414   IX86_BUILTIN_GATHER3DIV8DF,
30415   IX86_BUILTIN_GATHER3DIV8DI,
30416   IX86_BUILTIN_GATHER3SIV16SF,
30417   IX86_BUILTIN_GATHER3SIV16SI,
30418   IX86_BUILTIN_GATHER3SIV8DF,
30419   IX86_BUILTIN_GATHER3SIV8DI,
30420   IX86_BUILTIN_SCATTERDIV16SF,
30421   IX86_BUILTIN_SCATTERDIV16SI,
30422   IX86_BUILTIN_SCATTERDIV8DF,
30423   IX86_BUILTIN_SCATTERDIV8DI,
30424   IX86_BUILTIN_SCATTERSIV16SF,
30425   IX86_BUILTIN_SCATTERSIV16SI,
30426   IX86_BUILTIN_SCATTERSIV8DF,
30427   IX86_BUILTIN_SCATTERSIV8DI,
30428 
30429   /* AVX512PF */
30430   IX86_BUILTIN_GATHERPFQPD,
30431   IX86_BUILTIN_GATHERPFDPS,
30432   IX86_BUILTIN_GATHERPFDPD,
30433   IX86_BUILTIN_GATHERPFQPS,
30434   IX86_BUILTIN_SCATTERPFDPD,
30435   IX86_BUILTIN_SCATTERPFDPS,
30436   IX86_BUILTIN_SCATTERPFQPD,
30437   IX86_BUILTIN_SCATTERPFQPS,
30438 
30439   /* AVX-512ER */
30440   IX86_BUILTIN_EXP2PD_MASK,
30441   IX86_BUILTIN_EXP2PS_MASK,
30442   IX86_BUILTIN_EXP2PS,
30443   IX86_BUILTIN_RCP28PD,
30444   IX86_BUILTIN_RCP28PS,
30445   IX86_BUILTIN_RCP28SD,
30446   IX86_BUILTIN_RCP28SS,
30447   IX86_BUILTIN_RSQRT28PD,
30448   IX86_BUILTIN_RSQRT28PS,
30449   IX86_BUILTIN_RSQRT28SD,
30450   IX86_BUILTIN_RSQRT28SS,
30451 
30452   /* AVX-512IFMA */
30453   IX86_BUILTIN_VPMADD52LUQ512,
30454   IX86_BUILTIN_VPMADD52HUQ512,
30455   IX86_BUILTIN_VPMADD52LUQ256,
30456   IX86_BUILTIN_VPMADD52HUQ256,
30457   IX86_BUILTIN_VPMADD52LUQ128,
30458   IX86_BUILTIN_VPMADD52HUQ128,
30459   IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30460   IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30461   IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30462   IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30463   IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30464   IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30465 
30466   /* AVX-512VBMI */
30467   IX86_BUILTIN_VPMULTISHIFTQB512,
30468   IX86_BUILTIN_VPMULTISHIFTQB256,
30469   IX86_BUILTIN_VPMULTISHIFTQB128,
30470   IX86_BUILTIN_VPERMVARQI512_MASK,
30471   IX86_BUILTIN_VPERMT2VARQI512,
30472   IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30473   IX86_BUILTIN_VPERMI2VARQI512,
30474   IX86_BUILTIN_VPERMVARQI256_MASK,
30475   IX86_BUILTIN_VPERMVARQI128_MASK,
30476   IX86_BUILTIN_VPERMT2VARQI256,
30477   IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30478   IX86_BUILTIN_VPERMT2VARQI128,
30479   IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30480   IX86_BUILTIN_VPERMI2VARQI256,
30481   IX86_BUILTIN_VPERMI2VARQI128,
30482 
30483   /* SHA builtins.  */
30484   IX86_BUILTIN_SHA1MSG1,
30485   IX86_BUILTIN_SHA1MSG2,
30486   IX86_BUILTIN_SHA1NEXTE,
30487   IX86_BUILTIN_SHA1RNDS4,
30488   IX86_BUILTIN_SHA256MSG1,
30489   IX86_BUILTIN_SHA256MSG2,
30490   IX86_BUILTIN_SHA256RNDS2,
30491 
30492   /* CLWB instructions.  */
30493   IX86_BUILTIN_CLWB,
30494 
30495   /* PCOMMIT instructions.  */
30496   IX86_BUILTIN_PCOMMIT,
30497 
30498   /* CLFLUSHOPT instructions.  */
30499   IX86_BUILTIN_CLFLUSHOPT,
30500 
30501   /* TFmode support builtins.  */
30502   IX86_BUILTIN_INFQ,
30503   IX86_BUILTIN_HUGE_VALQ,
30504   IX86_BUILTIN_FABSQ,
30505   IX86_BUILTIN_COPYSIGNQ,
30506 
30507   /* Vectorizer support builtins.  */
30508   IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30509   IX86_BUILTIN_CPYSGNPS,
30510   IX86_BUILTIN_CPYSGNPD,
30511   IX86_BUILTIN_CPYSGNPS256,
30512   IX86_BUILTIN_CPYSGNPS512,
30513   IX86_BUILTIN_CPYSGNPD256,
30514   IX86_BUILTIN_CPYSGNPD512,
30515   IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30516   IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30517 
30518 
30519   /* FMA4 instructions.  */
30520   IX86_BUILTIN_VFMADDSS,
30521   IX86_BUILTIN_VFMADDSD,
30522   IX86_BUILTIN_VFMADDPS,
30523   IX86_BUILTIN_VFMADDPD,
30524   IX86_BUILTIN_VFMADDPS256,
30525   IX86_BUILTIN_VFMADDPD256,
30526   IX86_BUILTIN_VFMADDSUBPS,
30527   IX86_BUILTIN_VFMADDSUBPD,
30528   IX86_BUILTIN_VFMADDSUBPS256,
30529   IX86_BUILTIN_VFMADDSUBPD256,
30530 
30531   /* FMA3 instructions.  */
30532   IX86_BUILTIN_VFMADDSS3,
30533   IX86_BUILTIN_VFMADDSD3,
30534 
30535   /* XOP instructions.  */
30536   IX86_BUILTIN_VPCMOV,
30537   IX86_BUILTIN_VPCMOV_V2DI,
30538   IX86_BUILTIN_VPCMOV_V4SI,
30539   IX86_BUILTIN_VPCMOV_V8HI,
30540   IX86_BUILTIN_VPCMOV_V16QI,
30541   IX86_BUILTIN_VPCMOV_V4SF,
30542   IX86_BUILTIN_VPCMOV_V2DF,
30543   IX86_BUILTIN_VPCMOV256,
30544   IX86_BUILTIN_VPCMOV_V4DI256,
30545   IX86_BUILTIN_VPCMOV_V8SI256,
30546   IX86_BUILTIN_VPCMOV_V16HI256,
30547   IX86_BUILTIN_VPCMOV_V32QI256,
30548   IX86_BUILTIN_VPCMOV_V8SF256,
30549   IX86_BUILTIN_VPCMOV_V4DF256,
30550 
30551   IX86_BUILTIN_VPPERM,
30552 
30553   IX86_BUILTIN_VPMACSSWW,
30554   IX86_BUILTIN_VPMACSWW,
30555   IX86_BUILTIN_VPMACSSWD,
30556   IX86_BUILTIN_VPMACSWD,
30557   IX86_BUILTIN_VPMACSSDD,
30558   IX86_BUILTIN_VPMACSDD,
30559   IX86_BUILTIN_VPMACSSDQL,
30560   IX86_BUILTIN_VPMACSSDQH,
30561   IX86_BUILTIN_VPMACSDQL,
30562   IX86_BUILTIN_VPMACSDQH,
30563   IX86_BUILTIN_VPMADCSSWD,
30564   IX86_BUILTIN_VPMADCSWD,
30565 
30566   IX86_BUILTIN_VPHADDBW,
30567   IX86_BUILTIN_VPHADDBD,
30568   IX86_BUILTIN_VPHADDBQ,
30569   IX86_BUILTIN_VPHADDWD,
30570   IX86_BUILTIN_VPHADDWQ,
30571   IX86_BUILTIN_VPHADDDQ,
30572   IX86_BUILTIN_VPHADDUBW,
30573   IX86_BUILTIN_VPHADDUBD,
30574   IX86_BUILTIN_VPHADDUBQ,
30575   IX86_BUILTIN_VPHADDUWD,
30576   IX86_BUILTIN_VPHADDUWQ,
30577   IX86_BUILTIN_VPHADDUDQ,
30578   IX86_BUILTIN_VPHSUBBW,
30579   IX86_BUILTIN_VPHSUBWD,
30580   IX86_BUILTIN_VPHSUBDQ,
30581 
30582   IX86_BUILTIN_VPROTB,
30583   IX86_BUILTIN_VPROTW,
30584   IX86_BUILTIN_VPROTD,
30585   IX86_BUILTIN_VPROTQ,
30586   IX86_BUILTIN_VPROTB_IMM,
30587   IX86_BUILTIN_VPROTW_IMM,
30588   IX86_BUILTIN_VPROTD_IMM,
30589   IX86_BUILTIN_VPROTQ_IMM,
30590 
30591   IX86_BUILTIN_VPSHLB,
30592   IX86_BUILTIN_VPSHLW,
30593   IX86_BUILTIN_VPSHLD,
30594   IX86_BUILTIN_VPSHLQ,
30595   IX86_BUILTIN_VPSHAB,
30596   IX86_BUILTIN_VPSHAW,
30597   IX86_BUILTIN_VPSHAD,
30598   IX86_BUILTIN_VPSHAQ,
30599 
30600   IX86_BUILTIN_VFRCZSS,
30601   IX86_BUILTIN_VFRCZSD,
30602   IX86_BUILTIN_VFRCZPS,
30603   IX86_BUILTIN_VFRCZPD,
30604   IX86_BUILTIN_VFRCZPS256,
30605   IX86_BUILTIN_VFRCZPD256,
30606 
30607   IX86_BUILTIN_VPCOMEQUB,
30608   IX86_BUILTIN_VPCOMNEUB,
30609   IX86_BUILTIN_VPCOMLTUB,
30610   IX86_BUILTIN_VPCOMLEUB,
30611   IX86_BUILTIN_VPCOMGTUB,
30612   IX86_BUILTIN_VPCOMGEUB,
30613   IX86_BUILTIN_VPCOMFALSEUB,
30614   IX86_BUILTIN_VPCOMTRUEUB,
30615 
30616   IX86_BUILTIN_VPCOMEQUW,
30617   IX86_BUILTIN_VPCOMNEUW,
30618   IX86_BUILTIN_VPCOMLTUW,
30619   IX86_BUILTIN_VPCOMLEUW,
30620   IX86_BUILTIN_VPCOMGTUW,
30621   IX86_BUILTIN_VPCOMGEUW,
30622   IX86_BUILTIN_VPCOMFALSEUW,
30623   IX86_BUILTIN_VPCOMTRUEUW,
30624 
30625   IX86_BUILTIN_VPCOMEQUD,
30626   IX86_BUILTIN_VPCOMNEUD,
30627   IX86_BUILTIN_VPCOMLTUD,
30628   IX86_BUILTIN_VPCOMLEUD,
30629   IX86_BUILTIN_VPCOMGTUD,
30630   IX86_BUILTIN_VPCOMGEUD,
30631   IX86_BUILTIN_VPCOMFALSEUD,
30632   IX86_BUILTIN_VPCOMTRUEUD,
30633 
30634   IX86_BUILTIN_VPCOMEQUQ,
30635   IX86_BUILTIN_VPCOMNEUQ,
30636   IX86_BUILTIN_VPCOMLTUQ,
30637   IX86_BUILTIN_VPCOMLEUQ,
30638   IX86_BUILTIN_VPCOMGTUQ,
30639   IX86_BUILTIN_VPCOMGEUQ,
30640   IX86_BUILTIN_VPCOMFALSEUQ,
30641   IX86_BUILTIN_VPCOMTRUEUQ,
30642 
30643   IX86_BUILTIN_VPCOMEQB,
30644   IX86_BUILTIN_VPCOMNEB,
30645   IX86_BUILTIN_VPCOMLTB,
30646   IX86_BUILTIN_VPCOMLEB,
30647   IX86_BUILTIN_VPCOMGTB,
30648   IX86_BUILTIN_VPCOMGEB,
30649   IX86_BUILTIN_VPCOMFALSEB,
30650   IX86_BUILTIN_VPCOMTRUEB,
30651 
30652   IX86_BUILTIN_VPCOMEQW,
30653   IX86_BUILTIN_VPCOMNEW,
30654   IX86_BUILTIN_VPCOMLTW,
30655   IX86_BUILTIN_VPCOMLEW,
30656   IX86_BUILTIN_VPCOMGTW,
30657   IX86_BUILTIN_VPCOMGEW,
30658   IX86_BUILTIN_VPCOMFALSEW,
30659   IX86_BUILTIN_VPCOMTRUEW,
30660 
30661   IX86_BUILTIN_VPCOMEQD,
30662   IX86_BUILTIN_VPCOMNED,
30663   IX86_BUILTIN_VPCOMLTD,
30664   IX86_BUILTIN_VPCOMLED,
30665   IX86_BUILTIN_VPCOMGTD,
30666   IX86_BUILTIN_VPCOMGED,
30667   IX86_BUILTIN_VPCOMFALSED,
30668   IX86_BUILTIN_VPCOMTRUED,
30669 
30670   IX86_BUILTIN_VPCOMEQQ,
30671   IX86_BUILTIN_VPCOMNEQ,
30672   IX86_BUILTIN_VPCOMLTQ,
30673   IX86_BUILTIN_VPCOMLEQ,
30674   IX86_BUILTIN_VPCOMGTQ,
30675   IX86_BUILTIN_VPCOMGEQ,
30676   IX86_BUILTIN_VPCOMFALSEQ,
30677   IX86_BUILTIN_VPCOMTRUEQ,
30678 
30679   /* LWP instructions.  */
30680   IX86_BUILTIN_LLWPCB,
30681   IX86_BUILTIN_SLWPCB,
30682   IX86_BUILTIN_LWPVAL32,
30683   IX86_BUILTIN_LWPVAL64,
30684   IX86_BUILTIN_LWPINS32,
30685   IX86_BUILTIN_LWPINS64,
30686 
30687   IX86_BUILTIN_CLZS,
30688 
30689   /* RTM */
30690   IX86_BUILTIN_XBEGIN,
30691   IX86_BUILTIN_XEND,
30692   IX86_BUILTIN_XABORT,
30693   IX86_BUILTIN_XTEST,
30694 
30695   /* MPX */
30696   IX86_BUILTIN_BNDMK,
30697   IX86_BUILTIN_BNDSTX,
30698   IX86_BUILTIN_BNDLDX,
30699   IX86_BUILTIN_BNDCL,
30700   IX86_BUILTIN_BNDCU,
30701   IX86_BUILTIN_BNDRET,
30702   IX86_BUILTIN_BNDNARROW,
30703   IX86_BUILTIN_BNDINT,
30704   IX86_BUILTIN_SIZEOF,
30705   IX86_BUILTIN_BNDLOWER,
30706   IX86_BUILTIN_BNDUPPER,
30707 
30708   /* BMI instructions.  */
30709   IX86_BUILTIN_BEXTR32,
30710   IX86_BUILTIN_BEXTR64,
30711   IX86_BUILTIN_CTZS,
30712 
30713   /* TBM instructions.  */
30714   IX86_BUILTIN_BEXTRI32,
30715   IX86_BUILTIN_BEXTRI64,
30716 
30717   /* BMI2 instructions. */
30718   IX86_BUILTIN_BZHI32,
30719   IX86_BUILTIN_BZHI64,
30720   IX86_BUILTIN_PDEP32,
30721   IX86_BUILTIN_PDEP64,
30722   IX86_BUILTIN_PEXT32,
30723   IX86_BUILTIN_PEXT64,
30724 
30725   /* ADX instructions.  */
30726   IX86_BUILTIN_ADDCARRYX32,
30727   IX86_BUILTIN_ADDCARRYX64,
30728 
30729   /* SBB instructions.  */
30730   IX86_BUILTIN_SBB32,
30731   IX86_BUILTIN_SBB64,
30732 
30733   /* FSGSBASE instructions.  */
30734   IX86_BUILTIN_RDFSBASE32,
30735   IX86_BUILTIN_RDFSBASE64,
30736   IX86_BUILTIN_RDGSBASE32,
30737   IX86_BUILTIN_RDGSBASE64,
30738   IX86_BUILTIN_WRFSBASE32,
30739   IX86_BUILTIN_WRFSBASE64,
30740   IX86_BUILTIN_WRGSBASE32,
30741   IX86_BUILTIN_WRGSBASE64,
30742 
30743   /* RDRND instructions.  */
30744   IX86_BUILTIN_RDRAND16_STEP,
30745   IX86_BUILTIN_RDRAND32_STEP,
30746   IX86_BUILTIN_RDRAND64_STEP,
30747 
30748   /* RDSEED instructions.  */
30749   IX86_BUILTIN_RDSEED16_STEP,
30750   IX86_BUILTIN_RDSEED32_STEP,
30751   IX86_BUILTIN_RDSEED64_STEP,
30752 
30753   /* F16C instructions.  */
30754   IX86_BUILTIN_CVTPH2PS,
30755   IX86_BUILTIN_CVTPH2PS256,
30756   IX86_BUILTIN_CVTPS2PH,
30757   IX86_BUILTIN_CVTPS2PH256,
30758 
30759   /* MONITORX and MWAITX instrucions.   */
30760   IX86_BUILTIN_MONITORX,
30761   IX86_BUILTIN_MWAITX,
30762 
30763   /* CFString built-in for darwin */
30764   IX86_BUILTIN_CFSTRING,
30765 
30766   /* Builtins to get CPU type and supported features. */
30767   IX86_BUILTIN_CPU_INIT,
30768   IX86_BUILTIN_CPU_IS,
30769   IX86_BUILTIN_CPU_SUPPORTS,
30770 
30771   /* Read/write FLAGS register built-ins.  */
30772   IX86_BUILTIN_READ_FLAGS,
30773   IX86_BUILTIN_WRITE_FLAGS,
30774 
30775   IX86_BUILTIN_MAX
30776 };
30777 
30778 /* Table for the ix86 builtin decls.  */
30779 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30780 
30781 /* Table of all of the builtin functions that are possible with different ISA's
30782    but are waiting to be built until a function is declared to use that
30783    ISA.  */
30784 struct builtin_isa {
30785   const char *name;		/* function name */
30786   enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30787   HOST_WIDE_INT isa;		/* isa_flags this builtin is defined for */
30788   bool const_p;			/* true if the declaration is constant */
30789   bool leaf_p;			/* true if the declaration has leaf attribute */
30790   bool nothrow_p;		/* true if the declaration has nothrow attribute */
30791   bool set_and_not_built_p;
30792 };
30793 
30794 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30795 
30796 /* Bits that can still enable any inclusion of a builtin.  */
30797 static HOST_WIDE_INT deferred_isa_values = 0;
30798 
30799 /* Add an ix86 target builtin function with CODE, NAME and TYPE.  Save the MASK
30800    of which isa_flags to use in the ix86_builtins_isa array.  Stores the
30801    function decl in the ix86_builtins array.  Returns the function decl or
30802    NULL_TREE, if the builtin was not added.
30803 
30804    If the front end has a special hook for builtin functions, delay adding
30805    builtin functions that aren't in the current ISA until the ISA is changed
30806    with function specific optimization.  Doing so, can save about 300K for the
30807    default compiler.  When the builtin is expanded, check at that time whether
30808    it is valid.
30809 
30810    If the front end doesn't have a special hook, record all builtins, even if
30811    it isn't an instruction set in the current ISA in case the user uses
30812    function specific options for a different ISA, so that we don't get scope
30813    errors if a builtin is added in the middle of a function scope.  */
30814 
30815 static inline tree
30816 def_builtin (HOST_WIDE_INT mask, const char *name,
30817 	     enum ix86_builtin_func_type tcode,
30818 	     enum ix86_builtins code)
30819 {
30820   tree decl = NULL_TREE;
30821 
30822   if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30823     {
30824       ix86_builtins_isa[(int) code].isa = mask;
30825 
30826       /* OPTION_MASK_ISA_AVX512VL has special meaning. Despite of generic case,
30827 	 where any bit set means that built-in is enable, this bit must be *and-ed*
30828 	 with another one. E.g.: OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL
30829 	 means that *both* cpuid bits must be set for the built-in to be available.
30830 	 Handle this here.  */
30831       if (mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
30832 	  mask &= ~OPTION_MASK_ISA_AVX512VL;
30833 
30834       mask &= ~OPTION_MASK_ISA_64BIT;
30835       if (mask == 0
30836 	  || (mask & ix86_isa_flags) != 0
30837 	  || (lang_hooks.builtin_function
30838 	      == lang_hooks.builtin_function_ext_scope))
30839 
30840 	{
30841 	  tree type = ix86_get_builtin_func_type (tcode);
30842 	  decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30843 				       NULL, NULL_TREE);
30844 	  ix86_builtins[(int) code] = decl;
30845 	  ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30846 	}
30847       else
30848 	{
30849 	  /* Just a MASK where set_and_not_built_p == true can potentially
30850 	     include a builtin.  */
30851 	  deferred_isa_values |= mask;
30852 	  ix86_builtins[(int) code] = NULL_TREE;
30853 	  ix86_builtins_isa[(int) code].tcode = tcode;
30854 	  ix86_builtins_isa[(int) code].name = name;
30855 	  ix86_builtins_isa[(int) code].leaf_p = false;
30856 	  ix86_builtins_isa[(int) code].nothrow_p = false;
30857 	  ix86_builtins_isa[(int) code].const_p = false;
30858 	  ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30859 	}
30860     }
30861 
30862   return decl;
30863 }
30864 
30865 /* Like def_builtin, but also marks the function decl "const".  */
30866 
30867 static inline tree
30868 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30869 		   enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30870 {
30871   tree decl = def_builtin (mask, name, tcode, code);
30872   if (decl)
30873     TREE_READONLY (decl) = 1;
30874   else
30875     ix86_builtins_isa[(int) code].const_p = true;
30876 
30877   return decl;
30878 }
30879 
30880 /* Add any new builtin functions for a given ISA that may not have been
30881    declared.  This saves a bit of space compared to adding all of the
30882    declarations to the tree, even if we didn't use them.  */
30883 
30884 static void
30885 ix86_add_new_builtins (HOST_WIDE_INT isa)
30886 {
30887   if ((isa & deferred_isa_values) == 0)
30888     return;
30889 
30890   /* Bits in ISA value can be removed from potential isa values.  */
30891   deferred_isa_values &= ~isa;
30892 
30893   int i;
30894   tree saved_current_target_pragma = current_target_pragma;
30895   current_target_pragma = NULL_TREE;
30896 
30897   for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30898     {
30899       if ((ix86_builtins_isa[i].isa & isa) != 0
30900 	  && ix86_builtins_isa[i].set_and_not_built_p)
30901 	{
30902 	  tree decl, type;
30903 
30904 	  /* Don't define the builtin again.  */
30905 	  ix86_builtins_isa[i].set_and_not_built_p = false;
30906 
30907 	  type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30908 	  decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30909 						 type, i, BUILT_IN_MD, NULL,
30910 						 NULL_TREE);
30911 
30912 	  ix86_builtins[i] = decl;
30913 	  if (ix86_builtins_isa[i].const_p)
30914 	    TREE_READONLY (decl) = 1;
30915 	  if (ix86_builtins_isa[i].leaf_p)
30916 	    DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30917 						      NULL_TREE);
30918 	  if (ix86_builtins_isa[i].nothrow_p)
30919 	    TREE_NOTHROW (decl) = 1;
30920 	}
30921     }
30922 
30923   current_target_pragma = saved_current_target_pragma;
30924 }
30925 
30926 /* Bits for builtin_description.flag.  */
30927 
30928 /* Set when we don't support the comparison natively, and should
30929    swap_comparison in order to support it.  */
30930 #define BUILTIN_DESC_SWAP_OPERANDS	1
30931 
30932 struct builtin_description
30933 {
30934   const HOST_WIDE_INT mask;
30935   const enum insn_code icode;
30936   const char *const name;
30937   const enum ix86_builtins code;
30938   const enum rtx_code comparison;
30939   const int flag;
30940 };
30941 
30942 static const struct builtin_description bdesc_comi[] =
30943 {
30944   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30945   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30946   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30947   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30948   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30949   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30950   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30951   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30952   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30953   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30954   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30955   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30956   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30957   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30958   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30959   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30960   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30961   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30962   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30963   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30964   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30965   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30966   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30967   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30968 };
30969 
30970 static const struct builtin_description bdesc_pcmpestr[] =
30971 {
30972   /* SSE4.2 */
30973   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30974   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30975   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30976   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30977   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30978   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30979   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30980 };
30981 
30982 static const struct builtin_description bdesc_pcmpistr[] =
30983 {
30984   /* SSE4.2 */
30985   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30986   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30987   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30988   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30989   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30990   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30991   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30992 };
30993 
30994 /* Special builtins with variable number of arguments.  */
30995 static const struct builtin_description bdesc_special_args[] =
30996 {
30997   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30998   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30999   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
31000 
31001   /* 80387 (for use internally for atomic compound assignment).  */
31002   { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
31003   { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
31004   { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
31005   { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
31006 
31007   /* MMX */
31008   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
31009 
31010   /* 3DNow! */
31011   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
31012 
31013   /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES.  */
31014   { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
31015   { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
31016   { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31017   { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31018   { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31019   { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31020   { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31021   { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31022 
31023   { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
31024   { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
31025   { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31026   { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31027   { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31028   { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31029   { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31030   { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31031 
31032   /* SSE */
31033   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31034   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31035   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
31036 
31037   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31038   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31039   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31040   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31041 
31042   /* SSE or 3DNow!A  */
31043   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31044   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
31045 
31046   /* SSE2 */
31047   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31048   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31049   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31050   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
31051   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31052   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
31053   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
31054   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
31055   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
31056   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31057 
31058   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31059   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31060 
31061   /* SSE3 */
31062   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31063 
31064   /* SSE4.1 */
31065   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
31066 
31067   /* SSE4A */
31068   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31069   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31070 
31071   /* AVX */
31072   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
31073   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
31074 
31075   { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
31076   { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31077   { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31078   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
31079   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
31080 
31081   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31082   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31083   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31084   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31085   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31086   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
31087   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31088 
31089   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
31090   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31091   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31092 
31093   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
31094   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
31095   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
31096   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
31097   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
31098   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
31099   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
31100   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
31101 
31102   /* AVX2 */
31103   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
31104   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
31105   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
31106   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
31107   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
31108   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
31109   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
31110   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
31111   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
31112 
31113   /* AVX512F */
31114   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31115   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31116   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31117   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31118   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31119   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31120   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31121   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31122   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31123   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31124   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31125   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31126   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31127   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31128   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31129   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31130   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31131   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31132   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31133   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31134   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
31135   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
31136   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
31137   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
31138   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31139   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31140   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31141   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31142   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31143   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31144   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31145   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31146   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31147   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31148   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31149   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31150   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31151   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31152   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31153   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31154   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31155   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31156   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31157   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31158   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31159   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31160   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31161 
31162   { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
31163   { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
31164   { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
31165   { OPTION_MASK_ISA_LWP | OPTION_MASK_ISA_64BIT, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
31166   { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
31167   { OPTION_MASK_ISA_LWP | OPTION_MASK_ISA_64BIT, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
31168 
31169   /* FSGSBASE */
31170   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31171   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31172   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31173   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31174   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31175   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31176   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31177   { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31178 
31179   /* RTM */
31180   { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31181   { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
31182   { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
31183 
31184   /* AVX512BW */
31185   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
31186   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
31187   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
31188   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
31189 
31190   /* AVX512VL */
31191   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
31192   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
31193   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
31194   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
31195   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31196   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31197   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31198   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31199   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31200   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31201   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31202   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31203   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31204   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31205   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31206   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31207   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31208   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31209   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31210   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31211   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31212   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31213   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31214   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31215   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31216   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31217   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31218   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31219   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31220   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31221   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31222   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31223   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31224   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31225   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31226   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31227   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
31228   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
31229   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
31230   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
31231   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31232   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31233   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31234   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31235   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31236   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31237   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31238   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31239   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31240   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31241   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31242   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31243   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31244   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31245   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31246   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31247   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31248   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31249   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31250   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31251   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31252   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31253   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31254   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31255   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31256   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31257   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31258   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31259   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31260   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31261   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31262   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31263   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31264   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31265   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31266   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31267   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31268   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31269   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31270   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31271   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31272   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31273   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31274   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31275   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31276   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31277   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31278   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31279   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31280   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31281   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31282   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31283   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31284   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31285 
31286   /* PCOMMIT.  */
31287   { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31288 };
31289 
31290 /* Builtins with variable number of arguments.  */
31291 static const struct builtin_description bdesc_args[] =
31292 {
31293   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31294   { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31295   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31296   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31297   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31298   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31299   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31300 
31301   /* MMX */
31302   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31303   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31304   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31305   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31306   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31307   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31308 
31309   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31310   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31311   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31312   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31313   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31314   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31315   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31316   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31317 
31318   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31319   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31320 
31321   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31322   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31323   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31324   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31325 
31326   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31327   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31328   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31329   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31330   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31331   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31332 
31333   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31334   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31335   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31336   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31337   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31338   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31339 
31340   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31341   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31342   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31343 
31344   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31345 
31346   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31347   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31348   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31349   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31350   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31351   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31352 
31353   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31354   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31355   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31356   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31357   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31358   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31359 
31360   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31361   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31362   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31363   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31364 
31365   /* 3DNow! */
31366   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31367   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31368   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31369   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31370 
31371   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31372   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31373   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31374   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31375   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31376   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31377   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31378   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31379   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31380   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31381   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31382   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31383   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31384   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31385   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31386 
31387   /* 3DNow!A */
31388   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31389   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31390   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31391   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31392   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31393   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31394 
31395   /* SSE */
31396   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31397   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31398   { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31399   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31400   { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31401   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31402   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31403   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31404   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31405   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31406   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31407   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31408 
31409   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31410 
31411   { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31412   { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31413   { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31414   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31415   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31416   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31417   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31418   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31419 
31420   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31421   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31422   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31423   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31424   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31425   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31426   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31427   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31428   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31429   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31430   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31431   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31432   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31433   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31434   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31435   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31436   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31437   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31438   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31439   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31440 
31441   { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31442   { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31443   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31444   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31445 
31446   { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31447   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31448   { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31449   { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31450 
31451   { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3,  "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31452 
31453   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31454   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31455   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31456   { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31457   { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31458 
31459   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31460   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31461   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31462 
31463   { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31464 
31465   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31466   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31467   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31468 
31469   { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31470   { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31471 
31472   /* SSE MMX or 3Dnow!A */
31473   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31474   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31475   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31476 
31477   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31478   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31479   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31480   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31481 
31482   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31483   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31484 
31485   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31486 
31487   /* SSE2 */
31488   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31489 
31490   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF  },
31491   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31492   { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31493   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31494   { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31495 
31496   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31497   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31498   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31499   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31500   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31501 
31502   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31503 
31504   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31505   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31506   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31507   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31508 
31509   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31510   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31511   { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31512 
31513   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31514   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31515   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31516   { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31517   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31518   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31519   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31520   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31521 
31522   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31523   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31524   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31525   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31526   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31527   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31528   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31529   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31530   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31531   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31532   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31533   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31534   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31535   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31536   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31537   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31538   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31539   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31540   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31541   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31542 
31543   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31544   { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31545   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31546   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31547 
31548   { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31549   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31550   { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31551   { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31552 
31553   { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3,  "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31554 
31555   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31556   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31557   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31558 
31559   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31560 
31561   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31562   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31563   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31564   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31565   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31566   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31567   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31568   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31569 
31570   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31571   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31572   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31573   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31574   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31575   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31576   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31577   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31578 
31579   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31580   { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31581 
31582   { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31583   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31584   { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31585   { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31586 
31587   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31588   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31589 
31590   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31591   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31592   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
31593   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31594   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31595   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
31596 
31597   { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31598   { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31599   { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31600   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31601 
31602   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31603   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI  },
31604   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN,  (int) V4SI_FTYPE_V4SI_V4SI },
31605   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31606   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31607   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31608   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31609   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31610 
31611   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31612   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31613   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31614 
31615   { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31616   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31617 
31618   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31619   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31620 
31621   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31622 
31623   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31624   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31625   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31626   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31627 
31628   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31629   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31630   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31631   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31632   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31633   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31634   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31635 
31636   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31637   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31638   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31639   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31640   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31641   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31642   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31643 
31644   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31645   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31646   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31647   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31648 
31649   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31650   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31651   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31652 
31653   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31654 
31655   { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31656 
31657   /* SSE2 MMX */
31658   { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31659   { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31660 
31661   /* SSE3 */
31662   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31663   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31664 
31665   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31666   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31667   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31668   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31669   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31670   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31671 
31672   /* SSSE3 */
31673   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31674   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31675   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31676   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31677   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31678   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31679 
31680   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31681   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31682   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31683   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31684   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31685   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31686   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31687   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31688   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31689   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31690   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31691   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31692   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31693   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31694   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31695   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31696   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31697   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31698   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31699   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31700   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31701   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31702   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31703   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31704 
31705   /* SSSE3.  */
31706   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31707   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31708 
31709   /* SSE4.1 */
31710   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31711   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31712   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31713   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31714   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31715   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31716   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31717   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31718   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31719   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31720 
31721   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31722   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31723   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31724   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31725   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31726   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31727   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31728   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31729   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31730   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31731   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31732   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31733   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31734 
31735   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31736   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31737   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31738   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31739   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31740   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31741   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31742   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31743   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31744   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31745   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31746   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31747 
31748   /* SSE4.1 */
31749   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31750   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31751   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31752   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31753 
31754   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31755   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31756   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31757   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31758 
31759   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31760   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31761 
31762   { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31763   { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31764 
31765   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31766   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31767   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31768   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31769 
31770   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31771   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31772 
31773   { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31774   { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31775 
31776   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31777   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31778   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31779 
31780   /* SSE4.2 */
31781   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31782   { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31783   { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31784   { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31785   { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31786 
31787   /* SSE4A */
31788   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31789   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31790   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31791   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31792 
31793   /* AES */
31794   { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31795   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31796 
31797   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31798   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31799   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31800   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31801 
31802   /* PCLMUL */
31803   { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31804 
31805   /* AVX */
31806   { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31807   { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31808   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31809   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31810   { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31811   { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31812   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31813   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31814   { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31815   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31816   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31817   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31818   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31819   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31820   { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31821   { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31822   { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31823   { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31824   { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31825   { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31826   { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31827   { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31828   { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31829   { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31830   { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31831   { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31832 
31833   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31834   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31835   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31836   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31837 
31838   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31839   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31840   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31841   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31842   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31843   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31844   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31845   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31846   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31847   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31848   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31849   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31850   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31851   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31852   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31853   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31854   { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31855   { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31856   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31857   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31858   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31859   { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31860   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31861   { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31862   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31863   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31864   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31865   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31866   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31867   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31868   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31869   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31870   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31871   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31872 
31873   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31874   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31875   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31876 
31877   { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31878   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31879   { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31880   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31881   { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31882 
31883   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31884 
31885   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31886   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31887 
31888   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31889   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31890   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31891   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31892 
31893   { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31894   { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31895 
31896   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31897   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31898 
31899   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31900   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31901   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31902   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31903 
31904   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31905   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31906 
31907   { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31908   { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31909 
31910   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256,  "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31911   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256,  "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31912   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256,  "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31913   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256,  "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31914 
31915   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31916   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31917   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31918   { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31919   { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31920   { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31921 
31922   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31923   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31924   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31925   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31926   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31927   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31928   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31929   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31930   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31931   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31932   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31933   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31934   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31935   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31936   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31937 
31938   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF  },
31939   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31940 
31941   { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3,  "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31942   { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3,  "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31943 
31944   { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31945 
31946   /* AVX2 */
31947   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31948   { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31949   { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31950   { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31951   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256",  IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31952   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256",  IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31953   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256",  IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31954   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256",  IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31955   { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31956   { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31957   { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31958   { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31959   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31960   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31961   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31962   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31963   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31964   { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31965   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31966   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256",  IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31967   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256",  IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31968   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31969   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31970   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31971   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31972   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI  },
31973   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI  },
31974   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31975   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31976   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI  },
31977   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI  },
31978   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31979   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31980   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31981   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31982   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31983   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31984   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31985   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31986   { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31987   { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31988   { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31989   { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31990   { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31991   { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31992   { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31993   { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31994   { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31995   { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31996   { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31997   { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31998   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31999   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
32000   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2  , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
32001   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2  , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
32002   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2  , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
32003   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2  , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
32004   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2  , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
32005   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
32006   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2  , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
32007   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2  , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
32008   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2  , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
32009   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2  , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
32010   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2  , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
32011   { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
32012   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32013   { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32014   { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256"  , IX86_BUILTIN_PMULHW256  , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32015   { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256"  , IX86_BUILTIN_PMULLW256  , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32016   { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256"  , IX86_BUILTIN_PMULLD256  , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32017   { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
32018   { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32019   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
32020   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32021   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
32022   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
32023   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
32024   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32025   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32026   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32027   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
32028   { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32029   { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32030   { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32031   { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32032   { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
32033   { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
32034   { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32035   { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32036   { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32037   { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32038   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
32039   { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32040   { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32041   { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32042   { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32043   { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
32044   { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
32045   { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32046   { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32047   { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32048   { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32049   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32050   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32051   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32052   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32053   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32054   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI  },
32055   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN,  (int) V8SI_FTYPE_V8SI_V8SI },
32056   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32057   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32058   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32059   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32060   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32061   { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32062   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32063   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
32064   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
32065   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32066   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32067   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
32068   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
32069   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
32070   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
32071   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32072   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
32073   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
32074   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
32075   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
32076   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32077   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
32078   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
32079   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
32080   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
32081   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
32082   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
32083   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32084   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32085   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32086   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32087   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32088   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32089   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32090   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32091   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32092   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32093 
32094   { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt,   "__builtin_clzs",   IX86_BUILTIN_CLZS,    UNKNOWN,     (int) UINT16_FTYPE_UINT16 },
32095 
32096   /* BMI */
32097   { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32098   { OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32099   { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2,       "__builtin_ctzs",           IX86_BUILTIN_CTZS,    UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32100 
32101   /* TBM */
32102   { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32103   { OPTION_MASK_ISA_TBM | OPTION_MASK_ISA_64BIT, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32104 
32105   /* F16C */
32106   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
32107   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
32108   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
32109   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
32110 
32111   /* BMI2 */
32112   { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32113   { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32114   { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32115   { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32116   { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32117   { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32118 
32119   /* AVX512F */
32120   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
32121   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
32122   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
32123   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
32124   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
32125   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
32126   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32127   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32128   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32129   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32130   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32131   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32132   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32133   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
32134   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32135   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
32136   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32137   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32138   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32139   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32140   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32141   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32142   { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
32143   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask,  "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
32144   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
32145   { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
32146   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32147   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32148   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32149   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32150   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
32151   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
32152   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
32153   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
32154   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
32155   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
32156   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
32157   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
32158   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32159   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32160   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32161   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32162   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32163   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32164   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32165   { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32166   { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32167   { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32168   { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32169   { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32170   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32171   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32172   { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32173   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32174   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
32175   { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
32176   { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
32177   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32178   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
32179   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32180   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32181   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32182   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32183   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32184   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32185   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32186   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32187   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32188   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32189   { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32190   { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32191   { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32192   { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32193   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32194   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32195   { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32196   { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32197   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32198   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32199   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32200   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32201   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32202   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32203   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32204   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32205   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32206   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32207   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32208   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32209   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32210   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32211   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32212   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32213   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32214   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32215   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32216   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32217   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32218   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32219   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32220   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32221   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32222   { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32223   { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask"  , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32224   { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32225   { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32226   { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32227   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32228   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32229   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32230   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32231   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32232   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32233   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32234   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32235   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32236   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32237   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32238   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32239   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32240   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32241   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32242   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32243   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32244   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32245   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32246   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32247   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32248   { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32249   { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32250   { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32251   { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32252   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32253   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32254   { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32255   { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32256   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32257   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32258   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32259   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32260   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32261   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32262   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32263   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32264   { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32265   { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32266   { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32267   { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32268   { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32269   { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32270   { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32271   { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32272   { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32273   { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32274   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32275   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32276   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32277   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32278   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32279   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32280   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32281   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32282   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32283   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32284   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32285   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask,  "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32286   { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32287   { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32288   { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32289   { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32290   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32291   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32292   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32293   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32294   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32295   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32296   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32297   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32298   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32299   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32300   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32301   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32302   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32303   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32304   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32305   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32306   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32307   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32308   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32309   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32310   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32311   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32312   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32313   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32314   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32315   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32316 
32317   { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3,  "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32318   { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3,  "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32319   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32320   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32321   { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32322   { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32323   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32324   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32325 
32326   /* Mask arithmetic operations */
32327   { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32328   { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32329   { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32330   { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32331   { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32332   { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32333   { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32334   { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32335   { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32336   { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32337 
32338   /* SHA */
32339   { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32340   { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32341   { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32342   { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32343   { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32344   { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32345   { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32346 
32347   /* AVX512VL.  */
32348   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32349   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32350   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32351   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32352   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32353   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32354   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32355   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32356   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32357   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32358   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32359   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32360   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32361   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32362   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32363   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32364   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32365   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32366   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32367   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32368   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32369   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32370   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32371   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32372   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32373   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32374   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32375   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32376   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32377   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32378   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32379   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32380   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32381   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32382   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32383   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32384   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32385   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32386   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32387   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32388   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32389   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32390   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32391   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32392   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32393   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32394   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32395   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32396   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32397   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32398   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32399   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32400   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32401   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32402   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32403   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32404   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32405   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32406   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32407   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32408   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32409   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32410   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32411   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32412   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32413   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32414   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32415   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32416   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32417   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32418   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32419   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32420   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32421   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32422   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32423   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32424   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32425   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32426   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32427   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32428   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32429   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32430   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32431   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32432   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32433   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32434   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32435   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32436   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32437   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32438   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32439   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32440   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32441   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32442   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32443   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32444   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32445   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32446   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32447   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32448   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32449   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32450   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32451   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32452   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32453   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32454   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32455   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32456   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32457   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32458   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32459   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32460   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32461   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32462   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32463   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32464   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32465   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32466   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32467   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32468   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32469   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32470   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32471   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32472   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32473   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32474   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32475   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32476   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32477   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32478   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32479   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32480   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32481   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32482   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32483   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32484   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32485   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32486   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32487   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32488   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32489   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32490   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32491   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32492   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32493   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32494   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32495   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32496   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32497   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32498   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32499   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32500   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32501   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32502   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32503   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32504   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32505   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32506   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32507   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32508   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32509   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32510   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32511   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32512   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32513   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32514   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32515   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32516   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32517   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32518   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32519   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32520   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32521   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32522   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32523   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32524   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32525   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32526   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32527   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32528   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32529   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32530   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32531   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32532   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32533   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32534   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32535   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32536   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32537   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32538   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32539   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32540   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32541   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32542   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32543   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32544   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32545   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32546   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32547   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32548   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32549   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32550   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32551   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32552   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32553   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32554   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32555   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32556   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32557   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32558   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32559   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32560   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32561   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32562   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32563   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32564   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32565   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32566   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32567   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32568   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32569   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32570   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32571   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32572   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32573   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32574   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32575   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32576   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32577   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32578   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32579   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32580   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32581   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32582   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32583   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32584   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32585   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32586   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32587   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32588   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32589   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32590   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32591   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32592   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32593   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32594   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32595   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32596   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32597   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32598   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32599   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32600   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32601   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32602   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32603   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32604   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32605   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32606   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32607   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32608   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask"  , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32609   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32610   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask"  , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32611   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32612   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32613   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32614   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32615   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32616   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32617   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32618   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32619   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32620   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32621   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32622   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32623   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32624   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32625   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32626   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32627   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32628   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32629   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32630   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32631   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32632   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32633   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32634   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32635   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32636   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32637   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32638   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32639   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32640   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32641   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32642   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32643   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32644   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32645   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32646   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32647   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32648   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32649   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32650   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32651   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32652   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32653   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32654   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32655   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32656   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32657   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32658   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32659   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32660   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32661   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32662   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32663   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32664   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32665   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32666   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask",  IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32667   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask",  IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32668   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask",  IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32669   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask",  IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32670   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32671   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32672   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32673   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32674   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32675   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32676   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32677   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32678   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32679   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32680   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32681   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32682   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32683   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32684   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32685   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32686   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32687   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32688   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32689   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32690   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32691   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32692   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32693   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32694   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32695   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32696   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32697   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32698   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32699   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32700   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32701   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32702   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32703   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32704   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32705   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32706   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32707   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32708   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32709   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32710   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32711   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32712   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32713   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32714   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32715   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32716   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32717   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32718   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32719   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32720   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32721   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32722   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32723   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32724   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32725   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32726   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32727   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32728   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32729   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32730   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32731   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32732   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32733   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32734   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32735   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32736   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32737   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32738   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32739   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32740   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32741   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32742   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32743   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32744   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32745   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32746   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32747   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32748   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32749   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32750   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32751   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32752   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32753   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32754   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32755   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32756   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32757   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32758   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32759   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32760   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32761   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32762   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32763   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32764   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32765   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32766   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32767   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32768   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32769   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32770   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32771   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32772   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32773   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32774   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32775   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32776   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32777   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32778   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32779   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32780   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32781   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32782   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32783   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32784   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32785   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32786   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32787   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32788   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32789   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32790   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32791   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32792   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32793   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32794   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32795   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32796   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32797   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32798   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32799   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32800   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32801   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32802   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32803   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32804   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32805   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32806   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32807   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32808   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32809   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32810   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32811   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32812   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32813   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32814   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32815   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32816   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32817   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32818   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32819   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32820   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32821   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32822   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32823   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32824   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32825   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32826   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32827   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32828   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32829   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32830   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32831   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32832   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32833   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32834   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32835   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32836   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32837   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32838   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32839   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32840   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32841   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32842   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32843   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32844   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32845   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32846   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32847   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32848   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32849   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32850   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32851   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32852   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32853   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32854   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32855   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32856   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32857   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32858   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32859   { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32860   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32861   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32862   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32863   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32864   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32865   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32866   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32867   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32868   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32869   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32870   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32871   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32872   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32873   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32874   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32875   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32876   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32877   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32878   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32879   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32880   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32881   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32882   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32883   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32884   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32885   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32886   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32887   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32888   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32889   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32890   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32891   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32892   { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32893   { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32894   { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32895   { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32896   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32897   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32898   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32899   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32900   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32901   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32902   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32903   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32904   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32905   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32906   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32907   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32908   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32909   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32910   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32911   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32912   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32913   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32914   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32915   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32916   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32917   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32918   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32919   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32920   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32921   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32922   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32923   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32924   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32925   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32926   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32927   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32928   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32929   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32930   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32931   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32932   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32933   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32934   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32935   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32936   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32937   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32938   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32939   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32940   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32941   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32942   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32943   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32944   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32945   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32946   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32947   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32948   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32949   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32950   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32951   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32952   { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32953   { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32954   { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32955   { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32956   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32957   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32958   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32959   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32960   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32961   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32962   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask,  "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32963   { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32964   { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32965   { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32966   { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32967   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask,  "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32968   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32969   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32970   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32971   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32972   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask,  "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32973   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask,  "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32974   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32975   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32976   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32977   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32978   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32979   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32980   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32981   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32982   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32983   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32984   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32985   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32986   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32987   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32988   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32989   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32990   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32991   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32992   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32993   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32994   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask",  IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32995   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask",  IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32996   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask",  IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32997   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask",  IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32998   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32999   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
33000   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33001   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
33002   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
33003   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
33004   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
33005   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
33006   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
33007   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
33008   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
33009   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
33010   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
33011   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
33012   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
33013   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
33014   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
33015   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
33016   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
33017   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
33018   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
33019   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
33020   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
33021   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
33022   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
33023   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
33024   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
33025   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
33026   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
33027   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
33028   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
33029   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
33030   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
33031   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
33032   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
33033   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
33034   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
33035   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
33036   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
33037   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
33038   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33039   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
33040   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
33041   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
33042   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
33043   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
33044   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
33045   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
33046   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
33047   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
33048   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
33049   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
33050   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
33051   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
33052   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
33053   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
33054   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
33055   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
33056   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
33057   { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
33058   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
33059   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
33060 
33061   /* AVX512DQ.  */
33062   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
33063   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
33064   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
33065   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
33066   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
33067   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
33068   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
33069   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
33070   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
33071   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
33072   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
33073   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
33074   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33075   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33076   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33077   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33078   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33079   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33080   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33081   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
33082   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33083   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
33084   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
33085   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
33086   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
33087   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
33088   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
33089   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
33090   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
33091   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
33092   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
33093 
33094   /* AVX512BW.  */
33095   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
33096   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
33097   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask",  IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
33098   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33099   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33100   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask",  IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
33101   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
33102   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
33103   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33104   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33105   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
33106   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
33107   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
33108   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
33109   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
33110   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
33111   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
33112   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
33113   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33114   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33115   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33116   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33117   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33118   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33119   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33120   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33121   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33122   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33123   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33124   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33125   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33126   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33127   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33128   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33129   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33130   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33131   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33132   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33133   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33134   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33135   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33136   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33137   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33138   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33139   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33140   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33141   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33142   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33143   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33144   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask"  , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33145   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33146   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33147   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33148   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask",  IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
33149   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask",  IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
33150   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33151   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
33152   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
33153   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33154   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33155   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33156   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33157   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33158   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33159   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33160   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33161   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33162   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33163   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33164   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33165   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
33166   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
33167   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
33168   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
33169   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33170   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33171   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33172   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33173   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33174   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33175   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33176   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33177   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33178   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33179   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33180   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33181   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33182   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33183   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33184   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33185   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33186 
33187   /* AVX512IFMA */
33188   { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33189   { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33190   { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33191   { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33192   { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33193   { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33194   { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33195   { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33196   { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33197   { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33198   { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33199   { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33200 
33201   /* AVX512VBMI */
33202   { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33203   { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33204   { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33205   { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33206   { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33207   { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33208   { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33209   { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33210   { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33211   { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33212   { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33213   { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33214   { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33215   { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33216   { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33217 };
33218 
33219 /* Builtins with rounding support.  */
33220 static const struct builtin_description bdesc_round_args[] =
33221 {
33222   /* AVX512F */
33223   { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33224   { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33225   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33226   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33227   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
33228   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
33229   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
33230   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
33231   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33232   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33233   { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33234   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33235   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round,  "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33236   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33237   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round,  "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33238   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33239   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33240   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33241   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33242   { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33243   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33244   { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33245   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33246   { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33247   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33248   { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33249   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33250   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33251   { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33252   { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33253   { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33254   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33255   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33256   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33257   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33258   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33259   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33260   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33261   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33262   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33263   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33264   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33265   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33266   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33267   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33268   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33269   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33270   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33271   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33272   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33273   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33274   { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33275   { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33276   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33277   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33278   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33279   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33280   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33281   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33282   { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33283   { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33284   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33285   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33286   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33287   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33288   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33289   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33290   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33291   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33292   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33293   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33294   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33295   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33296   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33297   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33298   { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33299   { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33300   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33301   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33302   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33303   { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33304   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33305   { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33306   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33307   { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33308   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33309   { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33310   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33311   { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33312   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33313   { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33314   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33315   { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33316   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33317   { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33318   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33319   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33320   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33321   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33322   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33323   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33324   { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33325   { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33326   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33327   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33328   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33329   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33330   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33331   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33332   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33333   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33334   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33335   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33336   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33337   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33338   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33339   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33340   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33341   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33342 
33343   /* AVX512ER */
33344   { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33345   { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33346   { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33347   { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33348   { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33349   { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33350   { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33351   { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33352   { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33353   { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33354 
33355   /* AVX512DQ.  */
33356   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33357   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33358   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33359   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33360   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33361   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33362   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33363   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33364   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33365   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33366   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33367   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33368   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33369   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33370   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33371   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33372 };
33373 
33374 /* Bultins for MPX.  */
33375 static const struct builtin_description bdesc_mpx[] =
33376 {
33377   { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33378   { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33379   { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33380 };
33381 
33382 /* Const builtins for MPX.  */
33383 static const struct builtin_description bdesc_mpx_const[] =
33384 {
33385   { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33386   { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33387   { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33388   { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33389   { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33390   { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33391   { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33392   { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33393 };
33394 
33395 /* FMA4 and XOP.  */
33396 #define MULTI_ARG_4_DF2_DI_I	V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33397 #define MULTI_ARG_4_DF2_DI_I1	V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33398 #define MULTI_ARG_4_SF2_SI_I	V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33399 #define MULTI_ARG_4_SF2_SI_I1	V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33400 #define MULTI_ARG_3_SF		V4SF_FTYPE_V4SF_V4SF_V4SF
33401 #define MULTI_ARG_3_DF		V2DF_FTYPE_V2DF_V2DF_V2DF
33402 #define MULTI_ARG_3_SF2		V8SF_FTYPE_V8SF_V8SF_V8SF
33403 #define MULTI_ARG_3_DF2		V4DF_FTYPE_V4DF_V4DF_V4DF
33404 #define MULTI_ARG_3_DI		V2DI_FTYPE_V2DI_V2DI_V2DI
33405 #define MULTI_ARG_3_SI		V4SI_FTYPE_V4SI_V4SI_V4SI
33406 #define MULTI_ARG_3_SI_DI	V4SI_FTYPE_V4SI_V4SI_V2DI
33407 #define MULTI_ARG_3_HI		V8HI_FTYPE_V8HI_V8HI_V8HI
33408 #define MULTI_ARG_3_HI_SI	V8HI_FTYPE_V8HI_V8HI_V4SI
33409 #define MULTI_ARG_3_QI		V16QI_FTYPE_V16QI_V16QI_V16QI
33410 #define MULTI_ARG_3_DI2		V4DI_FTYPE_V4DI_V4DI_V4DI
33411 #define MULTI_ARG_3_SI2		V8SI_FTYPE_V8SI_V8SI_V8SI
33412 #define MULTI_ARG_3_HI2		V16HI_FTYPE_V16HI_V16HI_V16HI
33413 #define MULTI_ARG_3_QI2		V32QI_FTYPE_V32QI_V32QI_V32QI
33414 #define MULTI_ARG_2_SF		V4SF_FTYPE_V4SF_V4SF
33415 #define MULTI_ARG_2_DF		V2DF_FTYPE_V2DF_V2DF
33416 #define MULTI_ARG_2_DI		V2DI_FTYPE_V2DI_V2DI
33417 #define MULTI_ARG_2_SI		V4SI_FTYPE_V4SI_V4SI
33418 #define MULTI_ARG_2_HI		V8HI_FTYPE_V8HI_V8HI
33419 #define MULTI_ARG_2_QI		V16QI_FTYPE_V16QI_V16QI
33420 #define MULTI_ARG_2_DI_IMM	V2DI_FTYPE_V2DI_SI
33421 #define MULTI_ARG_2_SI_IMM	V4SI_FTYPE_V4SI_SI
33422 #define MULTI_ARG_2_HI_IMM	V8HI_FTYPE_V8HI_SI
33423 #define MULTI_ARG_2_QI_IMM	V16QI_FTYPE_V16QI_SI
33424 #define MULTI_ARG_2_DI_CMP	V2DI_FTYPE_V2DI_V2DI_CMP
33425 #define MULTI_ARG_2_SI_CMP	V4SI_FTYPE_V4SI_V4SI_CMP
33426 #define MULTI_ARG_2_HI_CMP	V8HI_FTYPE_V8HI_V8HI_CMP
33427 #define MULTI_ARG_2_QI_CMP	V16QI_FTYPE_V16QI_V16QI_CMP
33428 #define MULTI_ARG_2_SF_TF	V4SF_FTYPE_V4SF_V4SF_TF
33429 #define MULTI_ARG_2_DF_TF	V2DF_FTYPE_V2DF_V2DF_TF
33430 #define MULTI_ARG_2_DI_TF	V2DI_FTYPE_V2DI_V2DI_TF
33431 #define MULTI_ARG_2_SI_TF	V4SI_FTYPE_V4SI_V4SI_TF
33432 #define MULTI_ARG_2_HI_TF	V8HI_FTYPE_V8HI_V8HI_TF
33433 #define MULTI_ARG_2_QI_TF	V16QI_FTYPE_V16QI_V16QI_TF
33434 #define MULTI_ARG_1_SF		V4SF_FTYPE_V4SF
33435 #define MULTI_ARG_1_DF		V2DF_FTYPE_V2DF
33436 #define MULTI_ARG_1_SF2		V8SF_FTYPE_V8SF
33437 #define MULTI_ARG_1_DF2		V4DF_FTYPE_V4DF
33438 #define MULTI_ARG_1_DI		V2DI_FTYPE_V2DI
33439 #define MULTI_ARG_1_SI		V4SI_FTYPE_V4SI
33440 #define MULTI_ARG_1_HI		V8HI_FTYPE_V8HI
33441 #define MULTI_ARG_1_QI		V16QI_FTYPE_V16QI
33442 #define MULTI_ARG_1_SI_DI	V2DI_FTYPE_V4SI
33443 #define MULTI_ARG_1_HI_DI	V2DI_FTYPE_V8HI
33444 #define MULTI_ARG_1_HI_SI	V4SI_FTYPE_V8HI
33445 #define MULTI_ARG_1_QI_DI	V2DI_FTYPE_V16QI
33446 #define MULTI_ARG_1_QI_SI	V4SI_FTYPE_V16QI
33447 #define MULTI_ARG_1_QI_HI	V8HI_FTYPE_V16QI
33448 
33449 static const struct builtin_description bdesc_multi_arg[] =
33450 {
33451   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33452     "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33453     UNKNOWN, (int)MULTI_ARG_3_SF },
33454   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33455     "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33456     UNKNOWN, (int)MULTI_ARG_3_DF },
33457 
33458   { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33459     "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33460     UNKNOWN, (int)MULTI_ARG_3_SF },
33461   { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33462     "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33463     UNKNOWN, (int)MULTI_ARG_3_DF },
33464 
33465   { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33466     "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33467     UNKNOWN, (int)MULTI_ARG_3_SF },
33468   { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33469     "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33470     UNKNOWN, (int)MULTI_ARG_3_DF },
33471   { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33472     "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33473     UNKNOWN, (int)MULTI_ARG_3_SF2 },
33474   { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33475     "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33476     UNKNOWN, (int)MULTI_ARG_3_DF2 },
33477 
33478   { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33479     "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33480     UNKNOWN, (int)MULTI_ARG_3_SF },
33481   { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33482     "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33483     UNKNOWN, (int)MULTI_ARG_3_DF },
33484   { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33485     "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33486     UNKNOWN, (int)MULTI_ARG_3_SF2 },
33487   { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33488     "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33489     UNKNOWN, (int)MULTI_ARG_3_DF2 },
33490 
33491   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov",      IX86_BUILTIN_VPCMOV,	 UNKNOWN,      (int)MULTI_ARG_3_DI },
33492   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN,      (int)MULTI_ARG_3_DI },
33493   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si,        "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN,      (int)MULTI_ARG_3_SI },
33494   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi,        "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN,      (int)MULTI_ARG_3_HI },
33495   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi,       "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN,      (int)MULTI_ARG_3_QI },
33496   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df,        "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN,      (int)MULTI_ARG_3_DF },
33497   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf,        "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN,      (int)MULTI_ARG_3_SF },
33498 
33499   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov256",       IX86_BUILTIN_VPCMOV256,       UNKNOWN,      (int)MULTI_ARG_3_DI2 },
33500   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov_v4di256",  IX86_BUILTIN_VPCMOV_V4DI256,  UNKNOWN,      (int)MULTI_ARG_3_DI2 },
33501   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256,        "__builtin_ia32_vpcmov_v8si256",  IX86_BUILTIN_VPCMOV_V8SI256,  UNKNOWN,      (int)MULTI_ARG_3_SI2 },
33502   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256,       "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN,      (int)MULTI_ARG_3_HI2 },
33503   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256,       "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN,      (int)MULTI_ARG_3_QI2 },
33504   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256,        "__builtin_ia32_vpcmov_v4df256",  IX86_BUILTIN_VPCMOV_V4DF256,  UNKNOWN,      (int)MULTI_ARG_3_DF2 },
33505   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256,        "__builtin_ia32_vpcmov_v8sf256",  IX86_BUILTIN_VPCMOV_V8SF256,  UNKNOWN,      (int)MULTI_ARG_3_SF2 },
33506 
33507   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm,             "__builtin_ia32_vpperm",      IX86_BUILTIN_VPPERM,      UNKNOWN,      (int)MULTI_ARG_3_QI },
33508 
33509   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww,          "__builtin_ia32_vpmacssww",   IX86_BUILTIN_VPMACSSWW,   UNKNOWN,      (int)MULTI_ARG_3_HI },
33510   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww,           "__builtin_ia32_vpmacsww",    IX86_BUILTIN_VPMACSWW,    UNKNOWN,      (int)MULTI_ARG_3_HI },
33511   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd,          "__builtin_ia32_vpmacsswd",   IX86_BUILTIN_VPMACSSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
33512   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd,           "__builtin_ia32_vpmacswd",    IX86_BUILTIN_VPMACSWD,    UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
33513   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd,          "__builtin_ia32_vpmacssdd",   IX86_BUILTIN_VPMACSSDD,   UNKNOWN,      (int)MULTI_ARG_3_SI },
33514   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd,           "__builtin_ia32_vpmacsdd",    IX86_BUILTIN_VPMACSDD,    UNKNOWN,      (int)MULTI_ARG_3_SI },
33515   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql,         "__builtin_ia32_vpmacssdql",  IX86_BUILTIN_VPMACSSDQL,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
33516   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh,         "__builtin_ia32_vpmacssdqh",  IX86_BUILTIN_VPMACSSDQH,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
33517   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql,          "__builtin_ia32_vpmacsdql",   IX86_BUILTIN_VPMACSDQL,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
33518   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh,          "__builtin_ia32_vpmacsdqh",   IX86_BUILTIN_VPMACSDQH,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
33519   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd,         "__builtin_ia32_vpmadcsswd",  IX86_BUILTIN_VPMADCSSWD,  UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
33520   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd,          "__builtin_ia32_vpmadcswd",   IX86_BUILTIN_VPMADCSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
33521 
33522   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3,        "__builtin_ia32_vprotq",      IX86_BUILTIN_VPROTQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
33523   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3,        "__builtin_ia32_vprotd",      IX86_BUILTIN_VPROTD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
33524   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3,        "__builtin_ia32_vprotw",      IX86_BUILTIN_VPROTW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
33525   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3,       "__builtin_ia32_vprotb",      IX86_BUILTIN_VPROTB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
33526   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3,         "__builtin_ia32_vprotqi",     IX86_BUILTIN_VPROTQ_IMM,  UNKNOWN,      (int)MULTI_ARG_2_DI_IMM },
33527   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3,         "__builtin_ia32_vprotdi",     IX86_BUILTIN_VPROTD_IMM,  UNKNOWN,      (int)MULTI_ARG_2_SI_IMM },
33528   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3,         "__builtin_ia32_vprotwi",     IX86_BUILTIN_VPROTW_IMM,  UNKNOWN,      (int)MULTI_ARG_2_HI_IMM },
33529   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3,        "__builtin_ia32_vprotbi",     IX86_BUILTIN_VPROTB_IMM,  UNKNOWN,      (int)MULTI_ARG_2_QI_IMM },
33530   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3,         "__builtin_ia32_vpshaq",      IX86_BUILTIN_VPSHAQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
33531   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3,         "__builtin_ia32_vpshad",      IX86_BUILTIN_VPSHAD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
33532   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3,         "__builtin_ia32_vpshaw",      IX86_BUILTIN_VPSHAW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
33533   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3,        "__builtin_ia32_vpshab",      IX86_BUILTIN_VPSHAB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
33534   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3,         "__builtin_ia32_vpshlq",      IX86_BUILTIN_VPSHLQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
33535   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3,         "__builtin_ia32_vpshld",      IX86_BUILTIN_VPSHLD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
33536   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3,         "__builtin_ia32_vpshlw",      IX86_BUILTIN_VPSHLW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
33537   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3,        "__builtin_ia32_vpshlb",      IX86_BUILTIN_VPSHLB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
33538 
33539   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2,       "__builtin_ia32_vfrczss",     IX86_BUILTIN_VFRCZSS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
33540   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2,       "__builtin_ia32_vfrczsd",     IX86_BUILTIN_VFRCZSD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
33541   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2,         "__builtin_ia32_vfrczps",     IX86_BUILTIN_VFRCZPS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
33542   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2,         "__builtin_ia32_vfrczpd",     IX86_BUILTIN_VFRCZPD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
33543   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2,         "__builtin_ia32_vfrczps256",  IX86_BUILTIN_VFRCZPS256,  UNKNOWN,      (int)MULTI_ARG_1_SF2 },
33544   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2,         "__builtin_ia32_vfrczpd256",  IX86_BUILTIN_VFRCZPD256,  UNKNOWN,      (int)MULTI_ARG_1_DF2 },
33545 
33546   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw,           "__builtin_ia32_vphaddbw",    IX86_BUILTIN_VPHADDBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
33547   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd,           "__builtin_ia32_vphaddbd",    IX86_BUILTIN_VPHADDBD,    UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
33548   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq,           "__builtin_ia32_vphaddbq",    IX86_BUILTIN_VPHADDBQ,    UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
33549   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd,           "__builtin_ia32_vphaddwd",    IX86_BUILTIN_VPHADDWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
33550   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq,           "__builtin_ia32_vphaddwq",    IX86_BUILTIN_VPHADDWQ,    UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
33551   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq,           "__builtin_ia32_vphadddq",    IX86_BUILTIN_VPHADDDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
33552   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw,          "__builtin_ia32_vphaddubw",   IX86_BUILTIN_VPHADDUBW,   UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
33553   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd,          "__builtin_ia32_vphaddubd",   IX86_BUILTIN_VPHADDUBD,   UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
33554   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq,          "__builtin_ia32_vphaddubq",   IX86_BUILTIN_VPHADDUBQ,   UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
33555   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd,          "__builtin_ia32_vphadduwd",   IX86_BUILTIN_VPHADDUWD,   UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
33556   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq,          "__builtin_ia32_vphadduwq",   IX86_BUILTIN_VPHADDUWQ,   UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
33557   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq,          "__builtin_ia32_vphaddudq",   IX86_BUILTIN_VPHADDUDQ,   UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
33558   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw,           "__builtin_ia32_vphsubbw",    IX86_BUILTIN_VPHSUBBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
33559   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd,           "__builtin_ia32_vphsubwd",    IX86_BUILTIN_VPHSUBWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
33560   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq,           "__builtin_ia32_vphsubdq",    IX86_BUILTIN_VPHSUBDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
33561 
33562   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomeqb",    IX86_BUILTIN_VPCOMEQB,    EQ,           (int)MULTI_ARG_2_QI_CMP },
33563   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneb",    IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
33564   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneqb",   IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
33565   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomltb",    IX86_BUILTIN_VPCOMLTB,    LT,           (int)MULTI_ARG_2_QI_CMP },
33566   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomleb",    IX86_BUILTIN_VPCOMLEB,    LE,           (int)MULTI_ARG_2_QI_CMP },
33567   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgtb",    IX86_BUILTIN_VPCOMGTB,    GT,           (int)MULTI_ARG_2_QI_CMP },
33568   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgeb",    IX86_BUILTIN_VPCOMGEB,    GE,           (int)MULTI_ARG_2_QI_CMP },
33569 
33570   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomeqw",    IX86_BUILTIN_VPCOMEQW,    EQ,           (int)MULTI_ARG_2_HI_CMP },
33571   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomnew",    IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
33572   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomneqw",   IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
33573   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomltw",    IX86_BUILTIN_VPCOMLTW,    LT,           (int)MULTI_ARG_2_HI_CMP },
33574   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomlew",    IX86_BUILTIN_VPCOMLEW,    LE,           (int)MULTI_ARG_2_HI_CMP },
33575   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgtw",    IX86_BUILTIN_VPCOMGTW,    GT,           (int)MULTI_ARG_2_HI_CMP },
33576   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgew",    IX86_BUILTIN_VPCOMGEW,    GE,           (int)MULTI_ARG_2_HI_CMP },
33577 
33578   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomeqd",    IX86_BUILTIN_VPCOMEQD,    EQ,           (int)MULTI_ARG_2_SI_CMP },
33579   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomned",    IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
33580   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomneqd",   IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
33581   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomltd",    IX86_BUILTIN_VPCOMLTD,    LT,           (int)MULTI_ARG_2_SI_CMP },
33582   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomled",    IX86_BUILTIN_VPCOMLED,    LE,           (int)MULTI_ARG_2_SI_CMP },
33583   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomgtd",    IX86_BUILTIN_VPCOMGTD,    GT,           (int)MULTI_ARG_2_SI_CMP },
33584   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomged",    IX86_BUILTIN_VPCOMGED,    GE,           (int)MULTI_ARG_2_SI_CMP },
33585 
33586   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomeqq",    IX86_BUILTIN_VPCOMEQQ,    EQ,           (int)MULTI_ARG_2_DI_CMP },
33587   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneq",    IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
33588   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneqq",   IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
33589   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomltq",    IX86_BUILTIN_VPCOMLTQ,    LT,           (int)MULTI_ARG_2_DI_CMP },
33590   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomleq",    IX86_BUILTIN_VPCOMLEQ,    LE,           (int)MULTI_ARG_2_DI_CMP },
33591   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgtq",    IX86_BUILTIN_VPCOMGTQ,    GT,           (int)MULTI_ARG_2_DI_CMP },
33592   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgeq",    IX86_BUILTIN_VPCOMGEQ,    GE,           (int)MULTI_ARG_2_DI_CMP },
33593 
33594   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb",   IX86_BUILTIN_VPCOMEQUB,   EQ,           (int)MULTI_ARG_2_QI_CMP },
33595   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub",   IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
33596   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb",  IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
33597   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub",   IX86_BUILTIN_VPCOMLTUB,   LTU,          (int)MULTI_ARG_2_QI_CMP },
33598   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub",   IX86_BUILTIN_VPCOMLEUB,   LEU,          (int)MULTI_ARG_2_QI_CMP },
33599   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub",   IX86_BUILTIN_VPCOMGTUB,   GTU,          (int)MULTI_ARG_2_QI_CMP },
33600   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub",   IX86_BUILTIN_VPCOMGEUB,   GEU,          (int)MULTI_ARG_2_QI_CMP },
33601 
33602   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw",   IX86_BUILTIN_VPCOMEQUW,   EQ,           (int)MULTI_ARG_2_HI_CMP },
33603   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw",   IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
33604   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw",  IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
33605   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomltuw",   IX86_BUILTIN_VPCOMLTUW,   LTU,          (int)MULTI_ARG_2_HI_CMP },
33606   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomleuw",   IX86_BUILTIN_VPCOMLEUW,   LEU,          (int)MULTI_ARG_2_HI_CMP },
33607   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgtuw",   IX86_BUILTIN_VPCOMGTUW,   GTU,          (int)MULTI_ARG_2_HI_CMP },
33608   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgeuw",   IX86_BUILTIN_VPCOMGEUW,   GEU,          (int)MULTI_ARG_2_HI_CMP },
33609 
33610   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd",   IX86_BUILTIN_VPCOMEQUD,   EQ,           (int)MULTI_ARG_2_SI_CMP },
33611   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud",   IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
33612   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd",  IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
33613   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomltud",   IX86_BUILTIN_VPCOMLTUD,   LTU,          (int)MULTI_ARG_2_SI_CMP },
33614   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomleud",   IX86_BUILTIN_VPCOMLEUD,   LEU,          (int)MULTI_ARG_2_SI_CMP },
33615   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgtud",   IX86_BUILTIN_VPCOMGTUD,   GTU,          (int)MULTI_ARG_2_SI_CMP },
33616   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgeud",   IX86_BUILTIN_VPCOMGEUD,   GEU,          (int)MULTI_ARG_2_SI_CMP },
33617 
33618   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq",   IX86_BUILTIN_VPCOMEQUQ,   EQ,           (int)MULTI_ARG_2_DI_CMP },
33619   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq",   IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
33620   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq",  IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
33621   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomltuq",   IX86_BUILTIN_VPCOMLTUQ,   LTU,          (int)MULTI_ARG_2_DI_CMP },
33622   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomleuq",   IX86_BUILTIN_VPCOMLEUQ,   LEU,          (int)MULTI_ARG_2_DI_CMP },
33623   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgtuq",   IX86_BUILTIN_VPCOMGTUQ,   GTU,          (int)MULTI_ARG_2_DI_CMP },
33624   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgeuq",   IX86_BUILTIN_VPCOMGEUQ,   GEU,          (int)MULTI_ARG_2_DI_CMP },
33625 
33626   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
33627   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
33628   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
33629   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
33630   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
33631   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
33632   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
33633   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
33634 
33635   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueb",  IX86_BUILTIN_VPCOMTRUEB,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
33636   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtruew",  IX86_BUILTIN_VPCOMTRUEW,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
33637   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrued",  IX86_BUILTIN_VPCOMTRUED,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
33638   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueq",  IX86_BUILTIN_VPCOMTRUEQ,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
33639   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
33640   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
33641   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
33642   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
33643 
33644   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3,     "__builtin_ia32_vpermil2pd",  IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33645   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3,     "__builtin_ia32_vpermil2ps",  IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33646   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3,     "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33647   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3,     "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33648 
33649 };
33650 
33651 /* TM vector builtins.  */
33652 
33653 /* Reuse the existing x86-specific `struct builtin_description' cause
33654    we're lazy.  Add casts to make them fit.  */
33655 static const struct builtin_description bdesc_tm[] =
33656 {
33657   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33658   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33659   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33660   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33661   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33662   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33663   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33664 
33665   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33666   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33667   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33668   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33669   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33670   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33671   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33672 
33673   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33674   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33675   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33676   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33677   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33678   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33679   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33680 
33681   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33682   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33683   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33684 };
33685 
33686 /* TM callbacks.  */
33687 
33688 /* Return the builtin decl needed to load a vector of TYPE.  */
33689 
33690 static tree
33691 ix86_builtin_tm_load (tree type)
33692 {
33693   if (TREE_CODE (type) == VECTOR_TYPE)
33694     {
33695       switch (tree_to_uhwi (TYPE_SIZE (type)))
33696 	{
33697 	case 64:
33698 	  return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33699 	case 128:
33700 	  return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33701 	case 256:
33702 	  return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33703 	}
33704     }
33705   return NULL_TREE;
33706 }
33707 
33708 /* Return the builtin decl needed to store a vector of TYPE.  */
33709 
33710 static tree
33711 ix86_builtin_tm_store (tree type)
33712 {
33713   if (TREE_CODE (type) == VECTOR_TYPE)
33714     {
33715       switch (tree_to_uhwi (TYPE_SIZE (type)))
33716 	{
33717 	case 64:
33718 	  return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33719 	case 128:
33720 	  return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33721 	case 256:
33722 	  return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33723 	}
33724     }
33725   return NULL_TREE;
33726 }
33727 
33728 /* Initialize the transactional memory vector load/store builtins.  */
33729 
33730 static void
33731 ix86_init_tm_builtins (void)
33732 {
33733   enum ix86_builtin_func_type ftype;
33734   const struct builtin_description *d;
33735   size_t i;
33736   tree decl;
33737   tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33738   tree attrs_log, attrs_type_log;
33739 
33740   if (!flag_tm)
33741     return;
33742 
33743   /* If there are no builtins defined, we must be compiling in a
33744      language without trans-mem support.  */
33745   if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33746     return;
33747 
33748   /* Use whatever attributes a normal TM load has.  */
33749   decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33750   attrs_load = DECL_ATTRIBUTES (decl);
33751   attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33752   /* Use whatever attributes a normal TM store has.  */
33753   decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33754   attrs_store = DECL_ATTRIBUTES (decl);
33755   attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33756   /* Use whatever attributes a normal TM log has.  */
33757   decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33758   attrs_log = DECL_ATTRIBUTES (decl);
33759   attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33760 
33761   for (i = 0, d = bdesc_tm;
33762        i < ARRAY_SIZE (bdesc_tm);
33763        i++, d++)
33764     {
33765       if ((d->mask & ix86_isa_flags) != 0
33766 	  || (lang_hooks.builtin_function
33767 	      == lang_hooks.builtin_function_ext_scope))
33768 	{
33769 	  tree type, attrs, attrs_type;
33770 	  enum built_in_function code = (enum built_in_function) d->code;
33771 
33772 	  ftype = (enum ix86_builtin_func_type) d->flag;
33773 	  type = ix86_get_builtin_func_type (ftype);
33774 
33775 	  if (BUILTIN_TM_LOAD_P (code))
33776 	    {
33777 	      attrs = attrs_load;
33778 	      attrs_type = attrs_type_load;
33779 	    }
33780 	  else if (BUILTIN_TM_STORE_P (code))
33781 	    {
33782 	      attrs = attrs_store;
33783 	      attrs_type = attrs_type_store;
33784 	    }
33785 	  else
33786 	    {
33787 	      attrs = attrs_log;
33788 	      attrs_type = attrs_type_log;
33789 	    }
33790 	  decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33791 				       /* The builtin without the prefix for
33792 					  calling it directly.  */
33793 				       d->name + strlen ("__builtin_"),
33794 				       attrs);
33795 	  /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33796 	     set the TYPE_ATTRIBUTES.  */
33797 	  decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33798 
33799 	  set_builtin_decl (code, decl, false);
33800 	}
33801     }
33802 }
33803 
33804 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33805    in the current target ISA to allow the user to compile particular modules
33806    with different target specific options that differ from the command line
33807    options.  */
33808 static void
33809 ix86_init_mmx_sse_builtins (void)
33810 {
33811   const struct builtin_description * d;
33812   enum ix86_builtin_func_type ftype;
33813   size_t i;
33814 
33815   /* Add all special builtins with variable number of operands.  */
33816   for (i = 0, d = bdesc_special_args;
33817        i < ARRAY_SIZE (bdesc_special_args);
33818        i++, d++)
33819     {
33820       if (d->name == 0)
33821 	continue;
33822 
33823       ftype = (enum ix86_builtin_func_type) d->flag;
33824       def_builtin (d->mask, d->name, ftype, d->code);
33825     }
33826 
33827   /* Add all builtins with variable number of operands.  */
33828   for (i = 0, d = bdesc_args;
33829        i < ARRAY_SIZE (bdesc_args);
33830        i++, d++)
33831     {
33832       if (d->name == 0)
33833 	continue;
33834 
33835       ftype = (enum ix86_builtin_func_type) d->flag;
33836       def_builtin_const (d->mask, d->name, ftype, d->code);
33837     }
33838 
33839   /* Add all builtins with rounding.  */
33840   for (i = 0, d = bdesc_round_args;
33841        i < ARRAY_SIZE (bdesc_round_args);
33842        i++, d++)
33843     {
33844       if (d->name == 0)
33845 	continue;
33846 
33847       ftype = (enum ix86_builtin_func_type) d->flag;
33848       def_builtin_const (d->mask, d->name, ftype, d->code);
33849     }
33850 
33851   /* pcmpestr[im] insns.  */
33852   for (i = 0, d = bdesc_pcmpestr;
33853        i < ARRAY_SIZE (bdesc_pcmpestr);
33854        i++, d++)
33855     {
33856       if (d->code == IX86_BUILTIN_PCMPESTRM128)
33857 	ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33858       else
33859 	ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33860       def_builtin_const (d->mask, d->name, ftype, d->code);
33861     }
33862 
33863   /* pcmpistr[im] insns.  */
33864   for (i = 0, d = bdesc_pcmpistr;
33865        i < ARRAY_SIZE (bdesc_pcmpistr);
33866        i++, d++)
33867     {
33868       if (d->code == IX86_BUILTIN_PCMPISTRM128)
33869 	ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33870       else
33871 	ftype = INT_FTYPE_V16QI_V16QI_INT;
33872       def_builtin_const (d->mask, d->name, ftype, d->code);
33873     }
33874 
33875   /* comi/ucomi insns.  */
33876   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33877     {
33878       if (d->mask == OPTION_MASK_ISA_SSE2)
33879 	ftype = INT_FTYPE_V2DF_V2DF;
33880       else
33881 	ftype = INT_FTYPE_V4SF_V4SF;
33882       def_builtin_const (d->mask, d->name, ftype, d->code);
33883     }
33884 
33885   /* SSE */
33886   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33887 	       VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33888   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33889 	       UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33890 
33891   /* SSE or 3DNow!A */
33892   def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33893 	       "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33894 	       IX86_BUILTIN_MASKMOVQ);
33895 
33896   /* SSE2 */
33897   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33898 	       VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33899 
33900   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33901 	       VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33902   x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33903 			    VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33904 
33905   /* SSE3.  */
33906   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33907 	       VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33908   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33909 	       VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33910 
33911   /* AES */
33912   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33913 		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33914   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33915 		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33916   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33917 		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33918   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33919 		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33920   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33921 		     V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33922   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33923 		     V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33924 
33925   /* PCLMUL */
33926   def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33927 		     V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33928 
33929   /* RDRND */
33930   def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33931 	       INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33932   def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33933 	       INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33934   def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33935 	       "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33936 	       IX86_BUILTIN_RDRAND64_STEP);
33937 
33938   /* AVX2 */
33939   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33940 	       V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33941 	       IX86_BUILTIN_GATHERSIV2DF);
33942 
33943   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33944 	       V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33945 	       IX86_BUILTIN_GATHERSIV4DF);
33946 
33947   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33948 	       V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33949 	       IX86_BUILTIN_GATHERDIV2DF);
33950 
33951   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33952 	       V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33953 	       IX86_BUILTIN_GATHERDIV4DF);
33954 
33955   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33956 	       V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33957 	       IX86_BUILTIN_GATHERSIV4SF);
33958 
33959   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33960 	       V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33961 	       IX86_BUILTIN_GATHERSIV8SF);
33962 
33963   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33964 	       V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33965 	       IX86_BUILTIN_GATHERDIV4SF);
33966 
33967   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33968 	       V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33969 	       IX86_BUILTIN_GATHERDIV8SF);
33970 
33971   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33972 	       V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33973 	       IX86_BUILTIN_GATHERSIV2DI);
33974 
33975   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33976 	       V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33977 	       IX86_BUILTIN_GATHERSIV4DI);
33978 
33979   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33980 	       V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33981 	       IX86_BUILTIN_GATHERDIV2DI);
33982 
33983   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33984 	       V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33985 	       IX86_BUILTIN_GATHERDIV4DI);
33986 
33987   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33988 	       V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33989 	       IX86_BUILTIN_GATHERSIV4SI);
33990 
33991   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33992 	       V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33993 	       IX86_BUILTIN_GATHERSIV8SI);
33994 
33995   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33996 	       V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33997 	       IX86_BUILTIN_GATHERDIV4SI);
33998 
33999   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
34000 	       V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
34001 	       IX86_BUILTIN_GATHERDIV8SI);
34002 
34003   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
34004 	       V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
34005 	       IX86_BUILTIN_GATHERALTSIV4DF);
34006 
34007   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
34008 	       V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
34009 	       IX86_BUILTIN_GATHERALTDIV8SF);
34010 
34011   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
34012 	       V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
34013 	       IX86_BUILTIN_GATHERALTSIV4DI);
34014 
34015   def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
34016 	       V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
34017 	       IX86_BUILTIN_GATHERALTDIV8SI);
34018 
34019   /* AVX512F */
34020   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
34021 	       V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
34022 	       IX86_BUILTIN_GATHER3SIV16SF);
34023 
34024   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
34025 	       V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
34026 	       IX86_BUILTIN_GATHER3SIV8DF);
34027 
34028   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
34029 	       V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
34030 	       IX86_BUILTIN_GATHER3DIV16SF);
34031 
34032   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
34033 	       V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
34034 	       IX86_BUILTIN_GATHER3DIV8DF);
34035 
34036   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
34037 	       V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
34038 	       IX86_BUILTIN_GATHER3SIV16SI);
34039 
34040   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
34041 	       V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
34042 	       IX86_BUILTIN_GATHER3SIV8DI);
34043 
34044   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
34045 	       V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
34046 	       IX86_BUILTIN_GATHER3DIV16SI);
34047 
34048   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
34049 	       V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
34050 	       IX86_BUILTIN_GATHER3DIV8DI);
34051 
34052   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
34053 	       V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
34054 	       IX86_BUILTIN_GATHER3ALTSIV8DF);
34055 
34056   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
34057 	       V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
34058 	       IX86_BUILTIN_GATHER3ALTDIV16SF);
34059 
34060   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
34061 	       V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
34062 	       IX86_BUILTIN_GATHER3ALTSIV8DI);
34063 
34064   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
34065 	       V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
34066 	       IX86_BUILTIN_GATHER3ALTDIV16SI);
34067 
34068   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
34069 	       VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
34070 	       IX86_BUILTIN_SCATTERSIV16SF);
34071 
34072   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
34073 	       VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
34074 	       IX86_BUILTIN_SCATTERSIV8DF);
34075 
34076   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
34077 	       VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
34078 	       IX86_BUILTIN_SCATTERDIV16SF);
34079 
34080   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
34081 	       VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
34082 	       IX86_BUILTIN_SCATTERDIV8DF);
34083 
34084   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
34085 	       VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
34086 	       IX86_BUILTIN_SCATTERSIV16SI);
34087 
34088   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
34089 	       VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
34090 	       IX86_BUILTIN_SCATTERSIV8DI);
34091 
34092   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
34093 	       VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
34094 	       IX86_BUILTIN_SCATTERDIV16SI);
34095 
34096   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
34097 	       VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
34098 	       IX86_BUILTIN_SCATTERDIV8DI);
34099 
34100   /* AVX512VL */
34101   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
34102 	       V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
34103 	       IX86_BUILTIN_GATHER3SIV2DF);
34104 
34105   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
34106 	       V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
34107 	       IX86_BUILTIN_GATHER3SIV4DF);
34108 
34109   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
34110 	       V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
34111 	       IX86_BUILTIN_GATHER3DIV2DF);
34112 
34113   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
34114 	       V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
34115 	       IX86_BUILTIN_GATHER3DIV4DF);
34116 
34117   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
34118 	       V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
34119 	       IX86_BUILTIN_GATHER3SIV4SF);
34120 
34121   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
34122 	       V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
34123 	       IX86_BUILTIN_GATHER3SIV8SF);
34124 
34125   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
34126 	       V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
34127 	       IX86_BUILTIN_GATHER3DIV4SF);
34128 
34129   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
34130 	       V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
34131 	       IX86_BUILTIN_GATHER3DIV8SF);
34132 
34133   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
34134 	       V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
34135 	       IX86_BUILTIN_GATHER3SIV2DI);
34136 
34137   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
34138 	       V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
34139 	       IX86_BUILTIN_GATHER3SIV4DI);
34140 
34141   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
34142 	       V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
34143 	       IX86_BUILTIN_GATHER3DIV2DI);
34144 
34145   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
34146 	       V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
34147 	       IX86_BUILTIN_GATHER3DIV4DI);
34148 
34149   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
34150 	       V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
34151 	       IX86_BUILTIN_GATHER3SIV4SI);
34152 
34153   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
34154 	       V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
34155 	       IX86_BUILTIN_GATHER3SIV8SI);
34156 
34157   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
34158 	       V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
34159 	       IX86_BUILTIN_GATHER3DIV4SI);
34160 
34161   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
34162 	       V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
34163 	       IX86_BUILTIN_GATHER3DIV8SI);
34164 
34165   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
34166 	       V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
34167 	       IX86_BUILTIN_GATHER3ALTSIV4DF);
34168 
34169   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
34170 	       V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
34171 	       IX86_BUILTIN_GATHER3ALTDIV8SF);
34172 
34173   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
34174 	       V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
34175 	       IX86_BUILTIN_GATHER3ALTSIV4DI);
34176 
34177   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
34178 	       V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
34179 	       IX86_BUILTIN_GATHER3ALTDIV8SI);
34180 
34181   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
34182 	       VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
34183 	       IX86_BUILTIN_SCATTERSIV8SF);
34184 
34185   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
34186 	       VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
34187 	       IX86_BUILTIN_SCATTERSIV4SF);
34188 
34189   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
34190 	       VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
34191 	       IX86_BUILTIN_SCATTERSIV4DF);
34192 
34193   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
34194 	       VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
34195 	       IX86_BUILTIN_SCATTERSIV2DF);
34196 
34197   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
34198 	       VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
34199 	       IX86_BUILTIN_SCATTERDIV8SF);
34200 
34201   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
34202 	       VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
34203 	       IX86_BUILTIN_SCATTERDIV4SF);
34204 
34205   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
34206 	       VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
34207 	       IX86_BUILTIN_SCATTERDIV4DF);
34208 
34209   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34210 	       VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34211 	       IX86_BUILTIN_SCATTERDIV2DF);
34212 
34213   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34214 	       VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34215 	       IX86_BUILTIN_SCATTERSIV8SI);
34216 
34217   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34218 	       VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34219 	       IX86_BUILTIN_SCATTERSIV4SI);
34220 
34221   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34222 	       VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34223 	       IX86_BUILTIN_SCATTERSIV4DI);
34224 
34225   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34226 	       VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34227 	       IX86_BUILTIN_SCATTERSIV2DI);
34228 
34229   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34230 	       VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34231 	       IX86_BUILTIN_SCATTERDIV8SI);
34232 
34233   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34234 	       VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34235 	       IX86_BUILTIN_SCATTERDIV4SI);
34236 
34237   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34238 	       VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34239 	       IX86_BUILTIN_SCATTERDIV4DI);
34240 
34241   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34242 	       VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34243 	       IX86_BUILTIN_SCATTERDIV2DI);
34244 
34245   /* AVX512PF */
34246   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34247 	       VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34248 	       IX86_BUILTIN_GATHERPFDPD);
34249   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34250 	       VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34251 	       IX86_BUILTIN_GATHERPFDPS);
34252   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34253 	       VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34254 	       IX86_BUILTIN_GATHERPFQPD);
34255   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34256 	       VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34257 	       IX86_BUILTIN_GATHERPFQPS);
34258   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34259 	       VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34260 	       IX86_BUILTIN_SCATTERPFDPD);
34261   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34262 	       VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34263 	       IX86_BUILTIN_SCATTERPFDPS);
34264   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34265 	       VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34266 	       IX86_BUILTIN_SCATTERPFQPD);
34267   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34268 	       VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34269 	       IX86_BUILTIN_SCATTERPFQPS);
34270 
34271   /* SHA */
34272   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34273 		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34274   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34275 		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34276   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34277 		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34278   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34279 		     V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34280   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34281 		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34282   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34283 		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34284   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34285 		     V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34286 
34287   /* RTM.  */
34288   def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34289 	       VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34290 
34291   /* MMX access to the vec_init patterns.  */
34292   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34293 		     V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34294 
34295   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34296 		     V4HI_FTYPE_HI_HI_HI_HI,
34297 		     IX86_BUILTIN_VEC_INIT_V4HI);
34298 
34299   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34300 		     V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34301 		     IX86_BUILTIN_VEC_INIT_V8QI);
34302 
34303   /* Access to the vec_extract patterns.  */
34304   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34305 		     DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34306   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34307 		     DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34308   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34309 		     FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34310   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34311 		     SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34312   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34313 		     HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34314 
34315   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34316 		     "__builtin_ia32_vec_ext_v4hi",
34317 		     HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34318 
34319   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34320 		     SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34321 
34322   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34323 		     QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34324 
34325   /* Access to the vec_set patterns.  */
34326   def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34327 		     "__builtin_ia32_vec_set_v2di",
34328 		     V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34329 
34330   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34331 		     V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34332 
34333   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34334 		     V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34335 
34336   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34337 		     V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34338 
34339   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34340 		     "__builtin_ia32_vec_set_v4hi",
34341 		     V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34342 
34343   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34344 		     V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34345 
34346   /* RDSEED */
34347   def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34348 	       INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34349   def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34350 	       INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34351   def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34352 	       "__builtin_ia32_rdseed_di_step",
34353 	       INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34354 
34355   /* ADCX */
34356   def_builtin (0, "__builtin_ia32_addcarryx_u32",
34357 	       UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34358   def_builtin (OPTION_MASK_ISA_64BIT,
34359 	       "__builtin_ia32_addcarryx_u64",
34360 	       UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34361 	       IX86_BUILTIN_ADDCARRYX64);
34362 
34363   /* SBB */
34364   def_builtin (0, "__builtin_ia32_sbb_u32",
34365 	       UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34366   def_builtin (OPTION_MASK_ISA_64BIT,
34367 	       "__builtin_ia32_sbb_u64",
34368 	       UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34369 	       IX86_BUILTIN_SBB64);
34370 
34371   /* Read/write FLAGS.  */
34372   def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34373                UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34374   def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34375                UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34376   def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34377                VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34378   def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34379                VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34380 
34381   /* CLFLUSHOPT.  */
34382   def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34383 	       VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34384 
34385   /* CLWB.  */
34386   def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34387 	       VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34388 
34389   /* MONITORX and MWAITX.  */
34390   def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
34391 	       VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
34392   def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
34393 	       VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
34394 
34395   /* Add FMA4 multi-arg argument instructions */
34396   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34397     {
34398       if (d->name == 0)
34399 	continue;
34400 
34401       ftype = (enum ix86_builtin_func_type) d->flag;
34402       def_builtin_const (d->mask, d->name, ftype, d->code);
34403     }
34404 }
34405 
34406 static void
34407 ix86_init_mpx_builtins ()
34408 {
34409   const struct builtin_description * d;
34410   enum ix86_builtin_func_type ftype;
34411   tree decl;
34412   size_t i;
34413 
34414   for (i = 0, d = bdesc_mpx;
34415        i < ARRAY_SIZE (bdesc_mpx);
34416        i++, d++)
34417     {
34418       if (d->name == 0)
34419 	continue;
34420 
34421       ftype = (enum ix86_builtin_func_type) d->flag;
34422       decl = def_builtin (d->mask, d->name, ftype, d->code);
34423 
34424       /* With no leaf and nothrow flags for MPX builtins
34425 	 abnormal edges may follow its call when setjmp
34426 	 presents in the function.  Since we may have a lot
34427 	 of MPX builtins calls it causes lots of useless
34428 	 edges and enormous PHI nodes.  To avoid this we mark
34429 	 MPX builtins as leaf and nothrow.  */
34430       if (decl)
34431 	{
34432 	  DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34433 						    NULL_TREE);
34434 	  TREE_NOTHROW (decl) = 1;
34435 	}
34436       else
34437 	{
34438 	  ix86_builtins_isa[(int)d->code].leaf_p = true;
34439 	  ix86_builtins_isa[(int)d->code].nothrow_p = true;
34440 	}
34441     }
34442 
34443   for (i = 0, d = bdesc_mpx_const;
34444        i < ARRAY_SIZE (bdesc_mpx_const);
34445        i++, d++)
34446     {
34447       if (d->name == 0)
34448 	continue;
34449 
34450       ftype = (enum ix86_builtin_func_type) d->flag;
34451       decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34452 
34453       if (decl)
34454 	{
34455 	  DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34456 						    NULL_TREE);
34457 	  TREE_NOTHROW (decl) = 1;
34458 	}
34459       else
34460 	{
34461 	  ix86_builtins_isa[(int)d->code].leaf_p = true;
34462 	  ix86_builtins_isa[(int)d->code].nothrow_p = true;
34463 	}
34464     }
34465 }
34466 
34467 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34468    to return a pointer to VERSION_DECL if the outcome of the expression
34469    formed by PREDICATE_CHAIN is true.  This function will be called during
34470    version dispatch to decide which function version to execute.  It returns
34471    the basic block at the end, to which more conditions can be added.  */
34472 
34473 static basic_block
34474 add_condition_to_bb (tree function_decl, tree version_decl,
34475 		     tree predicate_chain, basic_block new_bb)
34476 {
34477   gimple return_stmt;
34478   tree convert_expr, result_var;
34479   gimple convert_stmt;
34480   gimple call_cond_stmt;
34481   gimple if_else_stmt;
34482 
34483   basic_block bb1, bb2, bb3;
34484   edge e12, e23;
34485 
34486   tree cond_var, and_expr_var = NULL_TREE;
34487   gimple_seq gseq;
34488 
34489   tree predicate_decl, predicate_arg;
34490 
34491   push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34492 
34493   gcc_assert (new_bb != NULL);
34494   gseq = bb_seq (new_bb);
34495 
34496 
34497   convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34498 	     		 build_fold_addr_expr (version_decl));
34499   result_var = create_tmp_var (ptr_type_node);
34500   convert_stmt = gimple_build_assign (result_var, convert_expr);
34501   return_stmt = gimple_build_return (result_var);
34502 
34503   if (predicate_chain == NULL_TREE)
34504     {
34505       gimple_seq_add_stmt (&gseq, convert_stmt);
34506       gimple_seq_add_stmt (&gseq, return_stmt);
34507       set_bb_seq (new_bb, gseq);
34508       gimple_set_bb (convert_stmt, new_bb);
34509       gimple_set_bb (return_stmt, new_bb);
34510       pop_cfun ();
34511       return new_bb;
34512     }
34513 
34514   while (predicate_chain != NULL)
34515     {
34516       cond_var = create_tmp_var (integer_type_node);
34517       predicate_decl = TREE_PURPOSE (predicate_chain);
34518       predicate_arg = TREE_VALUE (predicate_chain);
34519       call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34520       gimple_call_set_lhs (call_cond_stmt, cond_var);
34521 
34522       gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34523       gimple_set_bb (call_cond_stmt, new_bb);
34524       gimple_seq_add_stmt (&gseq, call_cond_stmt);
34525 
34526       predicate_chain = TREE_CHAIN (predicate_chain);
34527 
34528       if (and_expr_var == NULL)
34529         and_expr_var = cond_var;
34530       else
34531 	{
34532 	  gimple assign_stmt;
34533 	  /* Use MIN_EXPR to check if any integer is zero?.
34534 	     and_expr_var = min_expr <cond_var, and_expr_var>  */
34535 	  assign_stmt = gimple_build_assign (and_expr_var,
34536 			  build2 (MIN_EXPR, integer_type_node,
34537 				  cond_var, and_expr_var));
34538 
34539 	  gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34540 	  gimple_set_bb (assign_stmt, new_bb);
34541 	  gimple_seq_add_stmt (&gseq, assign_stmt);
34542 	}
34543     }
34544 
34545   if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34546 	  		            integer_zero_node,
34547 				    NULL_TREE, NULL_TREE);
34548   gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34549   gimple_set_bb (if_else_stmt, new_bb);
34550   gimple_seq_add_stmt (&gseq, if_else_stmt);
34551 
34552   gimple_seq_add_stmt (&gseq, convert_stmt);
34553   gimple_seq_add_stmt (&gseq, return_stmt);
34554   set_bb_seq (new_bb, gseq);
34555 
34556   bb1 = new_bb;
34557   e12 = split_block (bb1, if_else_stmt);
34558   bb2 = e12->dest;
34559   e12->flags &= ~EDGE_FALLTHRU;
34560   e12->flags |= EDGE_TRUE_VALUE;
34561 
34562   e23 = split_block (bb2, return_stmt);
34563 
34564   gimple_set_bb (convert_stmt, bb2);
34565   gimple_set_bb (return_stmt, bb2);
34566 
34567   bb3 = e23->dest;
34568   make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34569 
34570   remove_edge (e23);
34571   make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34572 
34573   pop_cfun ();
34574 
34575   return bb3;
34576 }
34577 
34578 /* This parses the attribute arguments to target in DECL and determines
34579    the right builtin to use to match the platform specification.
34580    It returns the priority value for this version decl.  If PREDICATE_LIST
34581    is not NULL, it stores the list of cpu features that need to be checked
34582    before dispatching this function.  */
34583 
34584 static unsigned int
34585 get_builtin_code_for_version (tree decl, tree *predicate_list)
34586 {
34587   tree attrs;
34588   struct cl_target_option cur_target;
34589   tree target_node;
34590   struct cl_target_option *new_target;
34591   const char *arg_str = NULL;
34592   const char *attrs_str = NULL;
34593   char *tok_str = NULL;
34594   char *token;
34595 
34596   /* Priority of i386 features, greater value is higher priority.   This is
34597      used to decide the order in which function dispatch must happen.  For
34598      instance, a version specialized for SSE4.2 should be checked for dispatch
34599      before a version for SSE3, as SSE4.2 implies SSE3.  */
34600   enum feature_priority
34601   {
34602     P_ZERO = 0,
34603     P_MMX,
34604     P_SSE,
34605     P_SSE2,
34606     P_SSE3,
34607     P_SSSE3,
34608     P_PROC_SSSE3,
34609     P_SSE4_A,
34610     P_PROC_SSE4_A,
34611     P_SSE4_1,
34612     P_SSE4_2,
34613     P_PROC_SSE4_2,
34614     P_POPCNT,
34615     P_AVX,
34616     P_PROC_AVX,
34617     P_BMI,
34618     P_PROC_BMI,
34619     P_FMA4,
34620     P_XOP,
34621     P_PROC_XOP,
34622     P_FMA,
34623     P_PROC_FMA,
34624     P_BMI2,
34625     P_AVX2,
34626     P_PROC_AVX2,
34627     P_AVX512F,
34628     P_PROC_AVX512F
34629   };
34630 
34631  enum feature_priority priority = P_ZERO;
34632 
34633   /* These are the target attribute strings for which a dispatcher is
34634      available, from fold_builtin_cpu.  */
34635 
34636   static struct _feature_list
34637     {
34638       const char *const name;
34639       const enum feature_priority priority;
34640     }
34641   const feature_list[] =
34642     {
34643       {"mmx", P_MMX},
34644       {"sse", P_SSE},
34645       {"sse2", P_SSE2},
34646       {"sse3", P_SSE3},
34647       {"sse4a", P_SSE4_A},
34648       {"ssse3", P_SSSE3},
34649       {"sse4.1", P_SSE4_1},
34650       {"sse4.2", P_SSE4_2},
34651       {"popcnt", P_POPCNT},
34652       {"avx", P_AVX},
34653       {"bmi", P_BMI},
34654       {"fma4", P_FMA4},
34655       {"xop", P_XOP},
34656       {"fma", P_FMA},
34657       {"bmi2", P_BMI2},
34658       {"avx2", P_AVX2},
34659       {"avx512f", P_AVX512F}
34660     };
34661 
34662 
34663   static unsigned int NUM_FEATURES
34664     = sizeof (feature_list) / sizeof (struct _feature_list);
34665 
34666   unsigned int i;
34667 
34668   tree predicate_chain = NULL_TREE;
34669   tree predicate_decl, predicate_arg;
34670 
34671   attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34672   gcc_assert (attrs != NULL);
34673 
34674   attrs = TREE_VALUE (TREE_VALUE (attrs));
34675 
34676   gcc_assert (TREE_CODE (attrs) == STRING_CST);
34677   attrs_str = TREE_STRING_POINTER (attrs);
34678 
34679   /* Return priority zero for default function.  */
34680   if (strcmp (attrs_str, "default") == 0)
34681     return 0;
34682 
34683   /* Handle arch= if specified.  For priority, set it to be 1 more than
34684      the best instruction set the processor can handle.  For instance, if
34685      there is a version for atom and a version for ssse3 (the highest ISA
34686      priority for atom), the atom version must be checked for dispatch
34687      before the ssse3 version. */
34688   if (strstr (attrs_str, "arch=") != NULL)
34689     {
34690       cl_target_option_save (&cur_target, &global_options);
34691       target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34692 						      &global_options_set);
34693 
34694       gcc_assert (target_node);
34695       new_target = TREE_TARGET_OPTION (target_node);
34696       gcc_assert (new_target);
34697 
34698       if (new_target->arch_specified && new_target->arch > 0)
34699 	{
34700 	  switch (new_target->arch)
34701 	    {
34702 	    case PROCESSOR_CORE2:
34703 	      arg_str = "core2";
34704 	      priority = P_PROC_SSSE3;
34705 	      break;
34706 	    case PROCESSOR_NEHALEM:
34707 	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34708 		arg_str = "westmere";
34709 	      else
34710 		/* We translate "arch=corei7" and "arch=nehalem" to
34711 		   "corei7" so that it will be mapped to M_INTEL_COREI7
34712 		   as cpu type to cover all M_INTEL_COREI7_XXXs.  */
34713 		arg_str = "corei7";
34714 	      priority = P_PROC_SSE4_2;
34715 	      break;
34716 	    case PROCESSOR_SANDYBRIDGE:
34717 	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34718 		arg_str = "ivybridge";
34719 	      else
34720 		arg_str = "sandybridge";
34721 	      priority = P_PROC_AVX;
34722 	      break;
34723 	    case PROCESSOR_HASWELL:
34724 	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34725 		arg_str = "broadwell";
34726 	      else
34727 		arg_str = "haswell";
34728 	      priority = P_PROC_AVX2;
34729 	      break;
34730 	    case PROCESSOR_BONNELL:
34731 	      arg_str = "bonnell";
34732 	      priority = P_PROC_SSSE3;
34733 	      break;
34734 	    case PROCESSOR_KNL:
34735 	      arg_str = "knl";
34736 	      priority = P_PROC_AVX512F;
34737 	      break;
34738 	    case PROCESSOR_SILVERMONT:
34739 	      arg_str = "silvermont";
34740 	      priority = P_PROC_SSE4_2;
34741 	      break;
34742 	    case PROCESSOR_AMDFAM10:
34743 	      arg_str = "amdfam10h";
34744 	      priority = P_PROC_SSE4_A;
34745 	      break;
34746 	    case PROCESSOR_BTVER1:
34747 	      arg_str = "btver1";
34748 	      priority = P_PROC_SSE4_A;
34749 	      break;
34750 	    case PROCESSOR_BTVER2:
34751 	      arg_str = "btver2";
34752 	      priority = P_PROC_BMI;
34753 	      break;
34754 	    case PROCESSOR_BDVER1:
34755 	      arg_str = "bdver1";
34756 	      priority = P_PROC_XOP;
34757 	      break;
34758 	    case PROCESSOR_BDVER2:
34759 	      arg_str = "bdver2";
34760 	      priority = P_PROC_FMA;
34761 	      break;
34762 	    case PROCESSOR_BDVER3:
34763 	      arg_str = "bdver3";
34764 	      priority = P_PROC_FMA;
34765 	      break;
34766 	    case PROCESSOR_BDVER4:
34767 	      arg_str = "bdver4";
34768 	      priority = P_PROC_AVX2;
34769 	      break;
34770 	    }
34771 	}
34772 
34773       cl_target_option_restore (&global_options, &cur_target);
34774 
34775       if (predicate_list && arg_str == NULL)
34776 	{
34777 	  error_at (DECL_SOURCE_LOCATION (decl),
34778 	    	"No dispatcher found for the versioning attributes");
34779 	  return 0;
34780 	}
34781 
34782       if (predicate_list)
34783 	{
34784           predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34785           /* For a C string literal the length includes the trailing NULL.  */
34786           predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34787           predicate_chain = tree_cons (predicate_decl, predicate_arg,
34788 				       predicate_chain);
34789 	}
34790     }
34791 
34792   /* Process feature name.  */
34793   tok_str =  (char *) xmalloc (strlen (attrs_str) + 1);
34794   strcpy (tok_str, attrs_str);
34795   token = strtok (tok_str, ",");
34796   predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34797 
34798   while (token != NULL)
34799     {
34800       /* Do not process "arch="  */
34801       if (strncmp (token, "arch=", 5) == 0)
34802 	{
34803 	  token = strtok (NULL, ",");
34804 	  continue;
34805 	}
34806       for (i = 0; i < NUM_FEATURES; ++i)
34807 	{
34808 	  if (strcmp (token, feature_list[i].name) == 0)
34809 	    {
34810 	      if (predicate_list)
34811 		{
34812 		  predicate_arg = build_string_literal (
34813 				  strlen (feature_list[i].name) + 1,
34814 				  feature_list[i].name);
34815 		  predicate_chain = tree_cons (predicate_decl, predicate_arg,
34816 					       predicate_chain);
34817 		}
34818 	      /* Find the maximum priority feature.  */
34819 	      if (feature_list[i].priority > priority)
34820 		priority = feature_list[i].priority;
34821 
34822 	      break;
34823 	    }
34824 	}
34825       if (predicate_list && i == NUM_FEATURES)
34826 	{
34827 	  error_at (DECL_SOURCE_LOCATION (decl),
34828 		    "No dispatcher found for %s", token);
34829 	  return 0;
34830 	}
34831       token = strtok (NULL, ",");
34832     }
34833   free (tok_str);
34834 
34835   if (predicate_list && predicate_chain == NULL_TREE)
34836     {
34837       error_at (DECL_SOURCE_LOCATION (decl),
34838 	        "No dispatcher found for the versioning attributes : %s",
34839 	        attrs_str);
34840       return 0;
34841     }
34842   else if (predicate_list)
34843     {
34844       predicate_chain = nreverse (predicate_chain);
34845       *predicate_list = predicate_chain;
34846     }
34847 
34848   return priority;
34849 }
34850 
34851 /* This compares the priority of target features in function DECL1
34852    and DECL2.  It returns positive value if DECL1 is higher priority,
34853    negative value if DECL2 is higher priority and 0 if they are the
34854    same.  */
34855 
34856 static int
34857 ix86_compare_version_priority (tree decl1, tree decl2)
34858 {
34859   unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34860   unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34861 
34862   return (int)priority1 - (int)priority2;
34863 }
34864 
34865 /* V1 and V2 point to function versions with different priorities
34866    based on the target ISA.  This function compares their priorities.  */
34867 
34868 static int
34869 feature_compare (const void *v1, const void *v2)
34870 {
34871   typedef struct _function_version_info
34872     {
34873       tree version_decl;
34874       tree predicate_chain;
34875       unsigned int dispatch_priority;
34876     } function_version_info;
34877 
34878   const function_version_info c1 = *(const function_version_info *)v1;
34879   const function_version_info c2 = *(const function_version_info *)v2;
34880   return (c2.dispatch_priority - c1.dispatch_priority);
34881 }
34882 
34883 /* This function generates the dispatch function for
34884    multi-versioned functions.  DISPATCH_DECL is the function which will
34885    contain the dispatch logic.  FNDECLS are the function choices for
34886    dispatch, and is a tree chain.  EMPTY_BB is the basic block pointer
34887    in DISPATCH_DECL in which the dispatch code is generated.  */
34888 
34889 static int
34890 dispatch_function_versions (tree dispatch_decl,
34891 			    void *fndecls_p,
34892 			    basic_block *empty_bb)
34893 {
34894   tree default_decl;
34895   gimple ifunc_cpu_init_stmt;
34896   gimple_seq gseq;
34897   int ix;
34898   tree ele;
34899   vec<tree> *fndecls;
34900   unsigned int num_versions = 0;
34901   unsigned int actual_versions = 0;
34902   unsigned int i;
34903 
34904   struct _function_version_info
34905     {
34906       tree version_decl;
34907       tree predicate_chain;
34908       unsigned int dispatch_priority;
34909     }*function_version_info;
34910 
34911   gcc_assert (dispatch_decl != NULL
34912 	      && fndecls_p != NULL
34913 	      && empty_bb != NULL);
34914 
34915   /*fndecls_p is actually a vector.  */
34916   fndecls = static_cast<vec<tree> *> (fndecls_p);
34917 
34918   /* At least one more version other than the default.  */
34919   num_versions = fndecls->length ();
34920   gcc_assert (num_versions >= 2);
34921 
34922   function_version_info = (struct _function_version_info *)
34923     XNEWVEC (struct _function_version_info, (num_versions - 1));
34924 
34925   /* The first version in the vector is the default decl.  */
34926   default_decl = (*fndecls)[0];
34927 
34928   push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34929 
34930   gseq = bb_seq (*empty_bb);
34931   /* Function version dispatch is via IFUNC.  IFUNC resolvers fire before
34932      constructors, so explicity call __builtin_cpu_init here.  */
34933   ifunc_cpu_init_stmt = gimple_build_call_vec (
34934                      ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34935   gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34936   gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34937   set_bb_seq (*empty_bb, gseq);
34938 
34939   pop_cfun ();
34940 
34941 
34942   for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34943     {
34944       tree version_decl = ele;
34945       tree predicate_chain = NULL_TREE;
34946       unsigned int priority;
34947       /* Get attribute string, parse it and find the right predicate decl.
34948          The predicate function could be a lengthy combination of many
34949 	 features, like arch-type and various isa-variants.  */
34950       priority = get_builtin_code_for_version (version_decl,
34951 	 			               &predicate_chain);
34952 
34953       if (predicate_chain == NULL_TREE)
34954 	continue;
34955 
34956       function_version_info [actual_versions].version_decl = version_decl;
34957       function_version_info [actual_versions].predicate_chain
34958 	 = predicate_chain;
34959       function_version_info [actual_versions].dispatch_priority = priority;
34960       actual_versions++;
34961     }
34962 
34963   /* Sort the versions according to descending order of dispatch priority.  The
34964      priority is based on the ISA.  This is not a perfect solution.  There
34965      could still be ambiguity.  If more than one function version is suitable
34966      to execute,  which one should be dispatched?  In future, allow the user
34967      to specify a dispatch  priority next to the version.  */
34968   qsort (function_version_info, actual_versions,
34969          sizeof (struct _function_version_info), feature_compare);
34970 
34971   for  (i = 0; i < actual_versions; ++i)
34972     *empty_bb = add_condition_to_bb (dispatch_decl,
34973 				     function_version_info[i].version_decl,
34974 				     function_version_info[i].predicate_chain,
34975 				     *empty_bb);
34976 
34977   /* dispatch default version at the end.  */
34978   *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34979 				   NULL, *empty_bb);
34980 
34981   free (function_version_info);
34982   return 0;
34983 }
34984 
34985 /* Comparator function to be used in qsort routine to sort attribute
34986    specification strings to "target".  */
34987 
34988 static int
34989 attr_strcmp (const void *v1, const void *v2)
34990 {
34991   const char *c1 = *(char *const*)v1;
34992   const char *c2 = *(char *const*)v2;
34993   return strcmp (c1, c2);
34994 }
34995 
34996 /* ARGLIST is the argument to target attribute.  This function tokenizes
34997    the comma separated arguments, sorts them and returns a string which
34998    is a unique identifier for the comma separated arguments.   It also
34999    replaces non-identifier characters "=,-" with "_".  */
35000 
35001 static char *
35002 sorted_attr_string (tree arglist)
35003 {
35004   tree arg;
35005   size_t str_len_sum = 0;
35006   char **args = NULL;
35007   char *attr_str, *ret_str;
35008   char *attr = NULL;
35009   unsigned int argnum = 1;
35010   unsigned int i;
35011 
35012   for (arg = arglist; arg; arg = TREE_CHAIN (arg))
35013     {
35014       const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
35015       size_t len = strlen (str);
35016       str_len_sum += len + 1;
35017       if (arg != arglist)
35018 	argnum++;
35019       for (i = 0; i < strlen (str); i++)
35020 	if (str[i] == ',')
35021 	  argnum++;
35022     }
35023 
35024   attr_str = XNEWVEC (char, str_len_sum);
35025   str_len_sum = 0;
35026   for (arg = arglist; arg; arg = TREE_CHAIN (arg))
35027     {
35028       const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
35029       size_t len = strlen (str);
35030       memcpy (attr_str + str_len_sum, str, len);
35031       attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
35032       str_len_sum += len + 1;
35033     }
35034 
35035   /* Replace "=,-" with "_".  */
35036   for (i = 0; i < strlen (attr_str); i++)
35037     if (attr_str[i] == '=' || attr_str[i]== '-')
35038       attr_str[i] = '_';
35039 
35040   if (argnum == 1)
35041     return attr_str;
35042 
35043   args = XNEWVEC (char *, argnum);
35044 
35045   i = 0;
35046   attr = strtok (attr_str, ",");
35047   while (attr != NULL)
35048     {
35049       args[i] = attr;
35050       i++;
35051       attr = strtok (NULL, ",");
35052     }
35053 
35054   qsort (args, argnum, sizeof (char *), attr_strcmp);
35055 
35056   ret_str = XNEWVEC (char, str_len_sum);
35057   str_len_sum = 0;
35058   for (i = 0; i < argnum; i++)
35059     {
35060       size_t len = strlen (args[i]);
35061       memcpy (ret_str + str_len_sum, args[i], len);
35062       ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
35063       str_len_sum += len + 1;
35064     }
35065 
35066   XDELETEVEC (args);
35067   XDELETEVEC (attr_str);
35068   return ret_str;
35069 }
35070 
35071 /* This function changes the assembler name for functions that are
35072    versions.  If DECL is a function version and has a "target"
35073    attribute, it appends the attribute string to its assembler name.  */
35074 
35075 static tree
35076 ix86_mangle_function_version_assembler_name (tree decl, tree id)
35077 {
35078   tree version_attr;
35079   const char *orig_name, *version_string;
35080   char *attr_str, *assembler_name;
35081 
35082   if (DECL_DECLARED_INLINE_P (decl)
35083       && lookup_attribute ("gnu_inline",
35084 			   DECL_ATTRIBUTES (decl)))
35085     error_at (DECL_SOURCE_LOCATION (decl),
35086 	      "Function versions cannot be marked as gnu_inline,"
35087 	      " bodies have to be generated");
35088 
35089   if (DECL_VIRTUAL_P (decl)
35090       || DECL_VINDEX (decl))
35091     sorry ("Virtual function multiversioning not supported");
35092 
35093   version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35094 
35095   /* target attribute string cannot be NULL.  */
35096   gcc_assert (version_attr != NULL_TREE);
35097 
35098   orig_name = IDENTIFIER_POINTER (id);
35099   version_string
35100     = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
35101 
35102   if (strcmp (version_string, "default") == 0)
35103     return id;
35104 
35105   attr_str = sorted_attr_string (TREE_VALUE (version_attr));
35106   assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
35107 
35108   sprintf (assembler_name, "%s.%s", orig_name, attr_str);
35109 
35110   /* Allow assembler name to be modified if already set.  */
35111   if (DECL_ASSEMBLER_NAME_SET_P (decl))
35112     SET_DECL_RTL (decl, NULL);
35113 
35114   tree ret = get_identifier (assembler_name);
35115   XDELETEVEC (attr_str);
35116   XDELETEVEC (assembler_name);
35117   return ret;
35118 }
35119 
35120 /* This function returns true if FN1 and FN2 are versions of the same function,
35121    that is, the target strings of the function decls are different.  This assumes
35122    that FN1 and FN2 have the same signature.  */
35123 
35124 static bool
35125 ix86_function_versions (tree fn1, tree fn2)
35126 {
35127   tree attr1, attr2;
35128   char *target1, *target2;
35129   bool result;
35130 
35131   if (TREE_CODE (fn1) != FUNCTION_DECL
35132       || TREE_CODE (fn2) != FUNCTION_DECL)
35133     return false;
35134 
35135   attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
35136   attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
35137 
35138   /* At least one function decl should have the target attribute specified.  */
35139   if (attr1 == NULL_TREE && attr2 == NULL_TREE)
35140     return false;
35141 
35142   /* Diagnose missing target attribute if one of the decls is already
35143      multi-versioned.  */
35144   if (attr1 == NULL_TREE || attr2 == NULL_TREE)
35145     {
35146       if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
35147 	{
35148 	  if (attr2 != NULL_TREE)
35149 	    {
35150 	      tree tem = fn1;
35151 	      fn1 = fn2;
35152 	      fn2 = tem;
35153 	      attr1 = attr2;
35154 	    }
35155 	  error_at (DECL_SOURCE_LOCATION (fn2),
35156 		    "missing %<target%> attribute for multi-versioned %D",
35157 		    fn2);
35158 	  inform (DECL_SOURCE_LOCATION (fn1),
35159 		  "previous declaration of %D", fn1);
35160 	  /* Prevent diagnosing of the same error multiple times.  */
35161 	  DECL_ATTRIBUTES (fn2)
35162 	    = tree_cons (get_identifier ("target"),
35163 			 copy_node (TREE_VALUE (attr1)),
35164 			 DECL_ATTRIBUTES (fn2));
35165 	}
35166       return false;
35167     }
35168 
35169   target1 = sorted_attr_string (TREE_VALUE (attr1));
35170   target2 = sorted_attr_string (TREE_VALUE (attr2));
35171 
35172   /* The sorted target strings must be different for fn1 and fn2
35173      to be versions.  */
35174   if (strcmp (target1, target2) == 0)
35175     result = false;
35176   else
35177     result = true;
35178 
35179   XDELETEVEC (target1);
35180   XDELETEVEC (target2);
35181 
35182   return result;
35183 }
35184 
35185 static tree
35186 ix86_mangle_decl_assembler_name (tree decl, tree id)
35187 {
35188   /* For function version, add the target suffix to the assembler name.  */
35189   if (TREE_CODE (decl) == FUNCTION_DECL
35190       && DECL_FUNCTION_VERSIONED (decl))
35191     id = ix86_mangle_function_version_assembler_name (decl, id);
35192 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
35193   id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
35194 #endif
35195 
35196   return id;
35197 }
35198 
35199 /* Return a new name by appending SUFFIX to the DECL name.  If make_unique
35200    is true, append the full path name of the source file.  */
35201 
35202 static char *
35203 make_name (tree decl, const char *suffix, bool make_unique)
35204 {
35205   char *global_var_name;
35206   int name_len;
35207   const char *name;
35208   const char *unique_name = NULL;
35209 
35210   name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35211 
35212   /* Get a unique name that can be used globally without any chances
35213      of collision at link time.  */
35214   if (make_unique)
35215     unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35216 
35217   name_len = strlen (name) + strlen (suffix) + 2;
35218 
35219   if (make_unique)
35220     name_len += strlen (unique_name) + 1;
35221   global_var_name = XNEWVEC (char, name_len);
35222 
35223   /* Use '.' to concatenate names as it is demangler friendly.  */
35224   if (make_unique)
35225     snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35226 	      suffix);
35227   else
35228     snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35229 
35230   return global_var_name;
35231 }
35232 
35233 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35234 
35235 /* Make a dispatcher declaration for the multi-versioned function DECL.
35236    Calls to DECL function will be replaced with calls to the dispatcher
35237    by the front-end.  Return the decl created.  */
35238 
35239 static tree
35240 make_dispatcher_decl (const tree decl)
35241 {
35242   tree func_decl;
35243   char *func_name;
35244   tree fn_type, func_type;
35245   bool is_uniq = false;
35246 
35247   if (TREE_PUBLIC (decl) == 0)
35248     is_uniq = true;
35249 
35250   func_name = make_name (decl, "ifunc", is_uniq);
35251 
35252   fn_type = TREE_TYPE (decl);
35253   func_type = build_function_type (TREE_TYPE (fn_type),
35254 				   TYPE_ARG_TYPES (fn_type));
35255 
35256   func_decl = build_fn_decl (func_name, func_type);
35257   XDELETEVEC (func_name);
35258   TREE_USED (func_decl) = 1;
35259   DECL_CONTEXT (func_decl) = NULL_TREE;
35260   DECL_INITIAL (func_decl) = error_mark_node;
35261   DECL_ARTIFICIAL (func_decl) = 1;
35262   /* Mark this func as external, the resolver will flip it again if
35263      it gets generated.  */
35264   DECL_EXTERNAL (func_decl) = 1;
35265   /* This will be of type IFUNCs have to be externally visible.  */
35266   TREE_PUBLIC (func_decl) = 1;
35267 
35268   return func_decl;
35269 }
35270 
35271 #endif
35272 
35273 /* Returns true if decl is multi-versioned and DECL is the default function,
35274    that is it is not tagged with target specific optimization.  */
35275 
35276 static bool
35277 is_function_default_version (const tree decl)
35278 {
35279   if (TREE_CODE (decl) != FUNCTION_DECL
35280       || !DECL_FUNCTION_VERSIONED (decl))
35281     return false;
35282   tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35283   gcc_assert (attr);
35284   attr = TREE_VALUE (TREE_VALUE (attr));
35285   return (TREE_CODE (attr) == STRING_CST
35286 	  && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35287 }
35288 
35289 /* Make a dispatcher declaration for the multi-versioned function DECL.
35290    Calls to DECL function will be replaced with calls to the dispatcher
35291    by the front-end.  Returns the decl of the dispatcher function.  */
35292 
35293 static tree
35294 ix86_get_function_versions_dispatcher (void *decl)
35295 {
35296   tree fn = (tree) decl;
35297   struct cgraph_node *node = NULL;
35298   struct cgraph_node *default_node = NULL;
35299   struct cgraph_function_version_info *node_v = NULL;
35300   struct cgraph_function_version_info *first_v = NULL;
35301 
35302   tree dispatch_decl = NULL;
35303 
35304   struct cgraph_function_version_info *default_version_info = NULL;
35305 
35306   gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35307 
35308   node = cgraph_node::get (fn);
35309   gcc_assert (node != NULL);
35310 
35311   node_v = node->function_version ();
35312   gcc_assert (node_v != NULL);
35313 
35314   if (node_v->dispatcher_resolver != NULL)
35315     return node_v->dispatcher_resolver;
35316 
35317   /* Find the default version and make it the first node.  */
35318   first_v = node_v;
35319   /* Go to the beginning of the chain.  */
35320   while (first_v->prev != NULL)
35321     first_v = first_v->prev;
35322   default_version_info = first_v;
35323   while (default_version_info != NULL)
35324     {
35325       if (is_function_default_version
35326 	    (default_version_info->this_node->decl))
35327         break;
35328       default_version_info = default_version_info->next;
35329     }
35330 
35331   /* If there is no default node, just return NULL.  */
35332   if (default_version_info == NULL)
35333     return NULL;
35334 
35335   /* Make default info the first node.  */
35336   if (first_v != default_version_info)
35337     {
35338       default_version_info->prev->next = default_version_info->next;
35339       if (default_version_info->next)
35340         default_version_info->next->prev = default_version_info->prev;
35341       first_v->prev = default_version_info;
35342       default_version_info->next = first_v;
35343       default_version_info->prev = NULL;
35344     }
35345 
35346   default_node = default_version_info->this_node;
35347 
35348 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35349   if (targetm.has_ifunc_p ())
35350     {
35351       struct cgraph_function_version_info *it_v = NULL;
35352       struct cgraph_node *dispatcher_node = NULL;
35353       struct cgraph_function_version_info *dispatcher_version_info = NULL;
35354 
35355       /* Right now, the dispatching is done via ifunc.  */
35356       dispatch_decl = make_dispatcher_decl (default_node->decl);
35357 
35358       dispatcher_node = cgraph_node::get_create (dispatch_decl);
35359       gcc_assert (dispatcher_node != NULL);
35360       dispatcher_node->dispatcher_function = 1;
35361       dispatcher_version_info
35362 	= dispatcher_node->insert_new_function_version ();
35363       dispatcher_version_info->next = default_version_info;
35364       dispatcher_node->definition = 1;
35365 
35366       /* Set the dispatcher for all the versions.  */
35367       it_v = default_version_info;
35368       while (it_v != NULL)
35369 	{
35370 	  it_v->dispatcher_resolver = dispatch_decl;
35371 	  it_v = it_v->next;
35372 	}
35373     }
35374   else
35375 #endif
35376     {
35377       error_at (DECL_SOURCE_LOCATION (default_node->decl),
35378 		"multiversioning needs ifunc which is not supported "
35379 		"on this target");
35380     }
35381 
35382   return dispatch_decl;
35383 }
35384 
35385 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35386    it to CHAIN.  */
35387 
35388 static tree
35389 make_attribute (const char *name, const char *arg_name, tree chain)
35390 {
35391   tree attr_name;
35392   tree attr_arg_name;
35393   tree attr_args;
35394   tree attr;
35395 
35396   attr_name = get_identifier (name);
35397   attr_arg_name = build_string (strlen (arg_name), arg_name);
35398   attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35399   attr = tree_cons (attr_name, attr_args, chain);
35400   return attr;
35401 }
35402 
35403 /* Make the resolver function decl to dispatch the versions of
35404    a multi-versioned function,  DEFAULT_DECL.  Create an
35405    empty basic block in the resolver and store the pointer in
35406    EMPTY_BB.  Return the decl of the resolver function.  */
35407 
35408 static tree
35409 make_resolver_func (const tree default_decl,
35410 		    const tree dispatch_decl,
35411 		    basic_block *empty_bb)
35412 {
35413   char *resolver_name;
35414   tree decl, type, decl_name, t;
35415   bool is_uniq = false;
35416 
35417   /* IFUNC's have to be globally visible.  So, if the default_decl is
35418      not, then the name of the IFUNC should be made unique.  */
35419   if (TREE_PUBLIC (default_decl) == 0)
35420     is_uniq = true;
35421 
35422   /* Append the filename to the resolver function if the versions are
35423      not externally visible.  This is because the resolver function has
35424      to be externally visible for the loader to find it.  So, appending
35425      the filename will prevent conflicts with a resolver function from
35426      another module which is based on the same version name.  */
35427   resolver_name = make_name (default_decl, "resolver", is_uniq);
35428 
35429   /* The resolver function should return a (void *). */
35430   type = build_function_type_list (ptr_type_node, NULL_TREE);
35431 
35432   decl = build_fn_decl (resolver_name, type);
35433   decl_name = get_identifier (resolver_name);
35434   SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35435 
35436   DECL_NAME (decl) = decl_name;
35437   TREE_USED (decl) = 1;
35438   DECL_ARTIFICIAL (decl) = 1;
35439   DECL_IGNORED_P (decl) = 0;
35440   /* IFUNC resolvers have to be externally visible.  */
35441   TREE_PUBLIC (decl) = 1;
35442   DECL_UNINLINABLE (decl) = 1;
35443 
35444   /* Resolver is not external, body is generated.  */
35445   DECL_EXTERNAL (decl) = 0;
35446   DECL_EXTERNAL (dispatch_decl) = 0;
35447 
35448   DECL_CONTEXT (decl) = NULL_TREE;
35449   DECL_INITIAL (decl) = make_node (BLOCK);
35450   DECL_STATIC_CONSTRUCTOR (decl) = 0;
35451 
35452   if (DECL_COMDAT_GROUP (default_decl)
35453       || TREE_PUBLIC (default_decl))
35454     {
35455       /* In this case, each translation unit with a call to this
35456 	 versioned function will put out a resolver.  Ensure it
35457 	 is comdat to keep just one copy.  */
35458       DECL_COMDAT (decl) = 1;
35459       make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35460     }
35461   /* Build result decl and add to function_decl. */
35462   t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35463   DECL_ARTIFICIAL (t) = 1;
35464   DECL_IGNORED_P (t) = 1;
35465   DECL_RESULT (decl) = t;
35466 
35467   gimplify_function_tree (decl);
35468   push_cfun (DECL_STRUCT_FUNCTION (decl));
35469   *empty_bb = init_lowered_empty_function (decl, false, 0);
35470 
35471   cgraph_node::add_new_function (decl, true);
35472   symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35473 
35474   pop_cfun ();
35475 
35476   gcc_assert (dispatch_decl != NULL);
35477   /* Mark dispatch_decl as "ifunc" with resolver as resolver_name.  */
35478   DECL_ATTRIBUTES (dispatch_decl)
35479     = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35480 
35481   /* Create the alias for dispatch to resolver here.  */
35482   /*cgraph_create_function_alias (dispatch_decl, decl);*/
35483   cgraph_node::create_same_body_alias (dispatch_decl, decl);
35484   XDELETEVEC (resolver_name);
35485   return decl;
35486 }
35487 
35488 /* Generate the dispatching code body to dispatch multi-versioned function
35489    DECL.  The target hook is called to process the "target" attributes and
35490    provide the code to dispatch the right function at run-time.  NODE points
35491    to the dispatcher decl whose body will be created.  */
35492 
35493 static tree
35494 ix86_generate_version_dispatcher_body (void *node_p)
35495 {
35496   tree resolver_decl;
35497   basic_block empty_bb;
35498   tree default_ver_decl;
35499   struct cgraph_node *versn;
35500   struct cgraph_node *node;
35501 
35502   struct cgraph_function_version_info *node_version_info = NULL;
35503   struct cgraph_function_version_info *versn_info = NULL;
35504 
35505   node = (cgraph_node *)node_p;
35506 
35507   node_version_info = node->function_version ();
35508   gcc_assert (node->dispatcher_function
35509 	      && node_version_info != NULL);
35510 
35511   if (node_version_info->dispatcher_resolver)
35512     return node_version_info->dispatcher_resolver;
35513 
35514   /* The first version in the chain corresponds to the default version.  */
35515   default_ver_decl = node_version_info->next->this_node->decl;
35516 
35517   /* node is going to be an alias, so remove the finalized bit.  */
35518   node->definition = false;
35519 
35520   resolver_decl = make_resolver_func (default_ver_decl,
35521 				      node->decl, &empty_bb);
35522 
35523   node_version_info->dispatcher_resolver = resolver_decl;
35524 
35525   push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35526 
35527   auto_vec<tree, 2> fn_ver_vec;
35528 
35529   for (versn_info = node_version_info->next; versn_info;
35530        versn_info = versn_info->next)
35531     {
35532       versn = versn_info->this_node;
35533       /* Check for virtual functions here again, as by this time it should
35534 	 have been determined if this function needs a vtable index or
35535 	 not.  This happens for methods in derived classes that override
35536 	 virtual methods in base classes but are not explicitly marked as
35537 	 virtual.  */
35538       if (DECL_VINDEX (versn->decl))
35539 	sorry ("Virtual function multiversioning not supported");
35540 
35541       fn_ver_vec.safe_push (versn->decl);
35542     }
35543 
35544   dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35545   cgraph_edge::rebuild_edges ();
35546   pop_cfun ();
35547   return resolver_decl;
35548 }
35549 /* This builds the processor_model struct type defined in
35550    libgcc/config/i386/cpuinfo.c  */
35551 
35552 static tree
35553 build_processor_model_struct (void)
35554 {
35555   const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35556 			      "__cpu_features"};
35557   tree field = NULL_TREE, field_chain = NULL_TREE;
35558   int i;
35559   tree type = make_node (RECORD_TYPE);
35560 
35561   /* The first 3 fields are unsigned int.  */
35562   for (i = 0; i < 3; ++i)
35563     {
35564       field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35565 			  get_identifier (field_name[i]), unsigned_type_node);
35566       if (field_chain != NULL_TREE)
35567 	DECL_CHAIN (field) = field_chain;
35568       field_chain = field;
35569     }
35570 
35571   /* The last field is an array of unsigned integers of size one.  */
35572   field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35573 		      get_identifier (field_name[3]),
35574 		      build_array_type (unsigned_type_node,
35575 					build_index_type (size_one_node)));
35576   if (field_chain != NULL_TREE)
35577     DECL_CHAIN (field) = field_chain;
35578   field_chain = field;
35579 
35580   finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35581   return type;
35582 }
35583 
35584 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35585 
35586 static tree
35587 make_var_decl (tree type, const char *name)
35588 {
35589   tree new_decl;
35590 
35591   new_decl = build_decl (UNKNOWN_LOCATION,
35592 	                 VAR_DECL,
35593 	  	         get_identifier(name),
35594 		         type);
35595 
35596   DECL_EXTERNAL (new_decl) = 1;
35597   TREE_STATIC (new_decl) = 1;
35598   TREE_PUBLIC (new_decl) = 1;
35599   DECL_INITIAL (new_decl) = 0;
35600   DECL_ARTIFICIAL (new_decl) = 0;
35601   DECL_PRESERVE_P (new_decl) = 1;
35602 
35603   make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35604   assemble_variable (new_decl, 0, 0, 0);
35605 
35606   return new_decl;
35607 }
35608 
35609 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35610    into an integer defined in libgcc/config/i386/cpuinfo.c */
35611 
35612 static tree
35613 fold_builtin_cpu (tree fndecl, tree *args)
35614 {
35615   unsigned int i;
35616   enum ix86_builtins fn_code = (enum ix86_builtins)
35617 				DECL_FUNCTION_CODE (fndecl);
35618   tree param_string_cst = NULL;
35619 
35620   /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35621   enum processor_features
35622   {
35623     F_CMOV = 0,
35624     F_MMX,
35625     F_POPCNT,
35626     F_SSE,
35627     F_SSE2,
35628     F_SSE3,
35629     F_SSSE3,
35630     F_SSE4_1,
35631     F_SSE4_2,
35632     F_AVX,
35633     F_AVX2,
35634     F_SSE4_A,
35635     F_FMA4,
35636     F_XOP,
35637     F_FMA,
35638     F_AVX512F,
35639     F_BMI,
35640     F_BMI2,
35641     F_MAX
35642   };
35643 
35644   /* These are the values for vendor types and cpu types  and subtypes
35645      in cpuinfo.c.  Cpu types and subtypes should be subtracted by
35646      the corresponding start value.  */
35647   enum processor_model
35648   {
35649     M_INTEL = 1,
35650     M_AMD,
35651     M_CPU_TYPE_START,
35652     M_INTEL_BONNELL,
35653     M_INTEL_CORE2,
35654     M_INTEL_COREI7,
35655     M_AMDFAM10H,
35656     M_AMDFAM15H,
35657     M_INTEL_SILVERMONT,
35658     M_INTEL_KNL,
35659     M_AMD_BTVER1,
35660     M_AMD_BTVER2,
35661     M_CPU_SUBTYPE_START,
35662     M_INTEL_COREI7_NEHALEM,
35663     M_INTEL_COREI7_WESTMERE,
35664     M_INTEL_COREI7_SANDYBRIDGE,
35665     M_AMDFAM10H_BARCELONA,
35666     M_AMDFAM10H_SHANGHAI,
35667     M_AMDFAM10H_ISTANBUL,
35668     M_AMDFAM15H_BDVER1,
35669     M_AMDFAM15H_BDVER2,
35670     M_AMDFAM15H_BDVER3,
35671     M_AMDFAM15H_BDVER4,
35672     M_INTEL_COREI7_IVYBRIDGE,
35673     M_INTEL_COREI7_HASWELL,
35674     M_INTEL_COREI7_BROADWELL
35675   };
35676 
35677   static struct _arch_names_table
35678     {
35679       const char *const name;
35680       const enum processor_model model;
35681     }
35682   const arch_names_table[] =
35683     {
35684       {"amd", M_AMD},
35685       {"intel", M_INTEL},
35686       {"atom", M_INTEL_BONNELL},
35687       {"slm", M_INTEL_SILVERMONT},
35688       {"core2", M_INTEL_CORE2},
35689       {"corei7", M_INTEL_COREI7},
35690       {"nehalem", M_INTEL_COREI7_NEHALEM},
35691       {"westmere", M_INTEL_COREI7_WESTMERE},
35692       {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35693       {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35694       {"haswell", M_INTEL_COREI7_HASWELL},
35695       {"broadwell", M_INTEL_COREI7_BROADWELL},
35696       {"bonnell", M_INTEL_BONNELL},
35697       {"silvermont", M_INTEL_SILVERMONT},
35698       {"knl", M_INTEL_KNL},
35699       {"amdfam10h", M_AMDFAM10H},
35700       {"barcelona", M_AMDFAM10H_BARCELONA},
35701       {"shanghai", M_AMDFAM10H_SHANGHAI},
35702       {"istanbul", M_AMDFAM10H_ISTANBUL},
35703       {"btver1", M_AMD_BTVER1},
35704       {"amdfam15h", M_AMDFAM15H},
35705       {"bdver1", M_AMDFAM15H_BDVER1},
35706       {"bdver2", M_AMDFAM15H_BDVER2},
35707       {"bdver3", M_AMDFAM15H_BDVER3},
35708       {"bdver4", M_AMDFAM15H_BDVER4},
35709       {"btver2", M_AMD_BTVER2},
35710     };
35711 
35712   static struct _isa_names_table
35713     {
35714       const char *const name;
35715       const enum processor_features feature;
35716     }
35717   const isa_names_table[] =
35718     {
35719       {"cmov",   F_CMOV},
35720       {"mmx",    F_MMX},
35721       {"popcnt", F_POPCNT},
35722       {"sse",    F_SSE},
35723       {"sse2",   F_SSE2},
35724       {"sse3",   F_SSE3},
35725       {"ssse3",  F_SSSE3},
35726       {"sse4a",  F_SSE4_A},
35727       {"sse4.1", F_SSE4_1},
35728       {"sse4.2", F_SSE4_2},
35729       {"avx",    F_AVX},
35730       {"fma4",   F_FMA4},
35731       {"xop",    F_XOP},
35732       {"fma",    F_FMA},
35733       {"avx2",   F_AVX2},
35734       {"avx512f",F_AVX512F},
35735       {"bmi",    F_BMI},
35736       {"bmi2",   F_BMI2}
35737     };
35738 
35739   tree __processor_model_type = build_processor_model_struct ();
35740   tree __cpu_model_var = make_var_decl (__processor_model_type,
35741 					"__cpu_model");
35742 
35743 
35744   varpool_node::add (__cpu_model_var);
35745 
35746   gcc_assert ((args != NULL) && (*args != NULL));
35747 
35748   param_string_cst = *args;
35749   while (param_string_cst
35750 	 && TREE_CODE (param_string_cst) !=  STRING_CST)
35751     {
35752       /* *args must be a expr that can contain other EXPRS leading to a
35753 	 STRING_CST.   */
35754       if (!EXPR_P (param_string_cst))
35755  	{
35756 	  error ("Parameter to builtin must be a string constant or literal");
35757 	  return integer_zero_node;
35758 	}
35759       param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35760     }
35761 
35762   gcc_assert (param_string_cst);
35763 
35764   if (fn_code == IX86_BUILTIN_CPU_IS)
35765     {
35766       tree ref;
35767       tree field;
35768       tree final;
35769 
35770       unsigned int field_val = 0;
35771       unsigned int NUM_ARCH_NAMES
35772 	= sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35773 
35774       for (i = 0; i < NUM_ARCH_NAMES; i++)
35775 	if (strcmp (arch_names_table[i].name,
35776 	    TREE_STRING_POINTER (param_string_cst)) == 0)
35777 	  break;
35778 
35779       if (i == NUM_ARCH_NAMES)
35780 	{
35781 	  error ("Parameter to builtin not valid: %s",
35782 	         TREE_STRING_POINTER (param_string_cst));
35783 	  return integer_zero_node;
35784 	}
35785 
35786       field = TYPE_FIELDS (__processor_model_type);
35787       field_val = arch_names_table[i].model;
35788 
35789       /* CPU types are stored in the next field.  */
35790       if (field_val > M_CPU_TYPE_START
35791 	  && field_val < M_CPU_SUBTYPE_START)
35792 	{
35793 	  field = DECL_CHAIN (field);
35794 	  field_val -= M_CPU_TYPE_START;
35795 	}
35796 
35797       /* CPU subtypes are stored in the next field.  */
35798       if (field_val > M_CPU_SUBTYPE_START)
35799 	{
35800 	  field = DECL_CHAIN ( DECL_CHAIN (field));
35801 	  field_val -= M_CPU_SUBTYPE_START;
35802 	}
35803 
35804       /* Get the appropriate field in __cpu_model.  */
35805       ref =  build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35806 		     field, NULL_TREE);
35807 
35808       /* Check the value.  */
35809       final = build2 (EQ_EXPR, unsigned_type_node, ref,
35810 		      build_int_cstu (unsigned_type_node, field_val));
35811       return build1 (CONVERT_EXPR, integer_type_node, final);
35812     }
35813   else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35814     {
35815       tree ref;
35816       tree array_elt;
35817       tree field;
35818       tree final;
35819 
35820       unsigned int field_val = 0;
35821       unsigned int NUM_ISA_NAMES
35822 	= sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35823 
35824       for (i = 0; i < NUM_ISA_NAMES; i++)
35825 	if (strcmp (isa_names_table[i].name,
35826 	    TREE_STRING_POINTER (param_string_cst)) == 0)
35827 	  break;
35828 
35829       if (i == NUM_ISA_NAMES)
35830 	{
35831 	  error ("Parameter to builtin not valid: %s",
35832 	       	 TREE_STRING_POINTER (param_string_cst));
35833 	  return integer_zero_node;
35834 	}
35835 
35836       field = TYPE_FIELDS (__processor_model_type);
35837       /* Get the last field, which is __cpu_features.  */
35838       while (DECL_CHAIN (field))
35839         field = DECL_CHAIN (field);
35840 
35841       /* Get the appropriate field: __cpu_model.__cpu_features  */
35842       ref =  build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35843 		     field, NULL_TREE);
35844 
35845       /* Access the 0th element of __cpu_features array.  */
35846       array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35847 			  integer_zero_node, NULL_TREE, NULL_TREE);
35848 
35849       field_val = (1 << isa_names_table[i].feature);
35850       /* Return __cpu_model.__cpu_features[0] & field_val  */
35851       final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35852 		      build_int_cstu (unsigned_type_node, field_val));
35853       return build1 (CONVERT_EXPR, integer_type_node, final);
35854     }
35855   gcc_unreachable ();
35856 }
35857 
35858 static tree
35859 ix86_fold_builtin (tree fndecl, int n_args,
35860 		   tree *args, bool ignore ATTRIBUTE_UNUSED)
35861 {
35862   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35863     {
35864       enum ix86_builtins fn_code = (enum ix86_builtins)
35865 				   DECL_FUNCTION_CODE (fndecl);
35866       if (fn_code ==  IX86_BUILTIN_CPU_IS
35867 	  || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35868 	{
35869 	  gcc_assert (n_args == 1);
35870           return fold_builtin_cpu (fndecl, args);
35871 	}
35872     }
35873 
35874 #ifdef SUBTARGET_FOLD_BUILTIN
35875   return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35876 #endif
35877 
35878   return NULL_TREE;
35879 }
35880 
35881 /* Make builtins to detect cpu type and features supported.  NAME is
35882    the builtin name, CODE is the builtin code, and FTYPE is the function
35883    type of the builtin.  */
35884 
35885 static void
35886 make_cpu_type_builtin (const char* name, int code,
35887 		       enum ix86_builtin_func_type ftype, bool is_const)
35888 {
35889   tree decl;
35890   tree type;
35891 
35892   type = ix86_get_builtin_func_type (ftype);
35893   decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35894 			       NULL, NULL_TREE);
35895   gcc_assert (decl != NULL_TREE);
35896   ix86_builtins[(int) code] = decl;
35897   TREE_READONLY (decl) = is_const;
35898 }
35899 
35900 /* Make builtins to get CPU type and features supported.  The created
35901    builtins are :
35902 
35903    __builtin_cpu_init (), to detect cpu type and features,
35904    __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35905    __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35906    */
35907 
35908 static void
35909 ix86_init_platform_type_builtins (void)
35910 {
35911   make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35912 			 INT_FTYPE_VOID, false);
35913   make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35914 			 INT_FTYPE_PCCHAR, true);
35915   make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35916 			 INT_FTYPE_PCCHAR, true);
35917 }
35918 
35919 /* Internal method for ix86_init_builtins.  */
35920 
35921 static void
35922 ix86_init_builtins_va_builtins_abi (void)
35923 {
35924   tree ms_va_ref, sysv_va_ref;
35925   tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35926   tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35927   tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35928   tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35929 
35930   if (!TARGET_64BIT)
35931     return;
35932   fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35933   fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35934   ms_va_ref = build_reference_type (ms_va_list_type_node);
35935   sysv_va_ref =
35936     build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35937 
35938   fnvoid_va_end_ms =
35939     build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35940   fnvoid_va_start_ms =
35941     build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35942   fnvoid_va_end_sysv =
35943     build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35944   fnvoid_va_start_sysv =
35945     build_varargs_function_type_list (void_type_node, sysv_va_ref,
35946     				       NULL_TREE);
35947   fnvoid_va_copy_ms =
35948     build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35949     			      NULL_TREE);
35950   fnvoid_va_copy_sysv =
35951     build_function_type_list (void_type_node, sysv_va_ref,
35952     			      sysv_va_ref, NULL_TREE);
35953 
35954   add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35955   			BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35956   add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35957   			BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35958   add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35959 			BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35960   add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35961   			BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35962   add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35963   			BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35964   add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35965 			BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35966 }
35967 
35968 static void
35969 ix86_init_builtin_types (void)
35970 {
35971   tree float128_type_node, float80_type_node;
35972 
35973   /* The __float80 type.  */
35974   float80_type_node = long_double_type_node;
35975   if (TYPE_MODE (float80_type_node) != XFmode)
35976     {
35977       /* The __float80 type.  */
35978       float80_type_node = make_node (REAL_TYPE);
35979 
35980       TYPE_PRECISION (float80_type_node) = 80;
35981       layout_type (float80_type_node);
35982     }
35983   lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35984 
35985   /* The __float128 type.  */
35986   float128_type_node = make_node (REAL_TYPE);
35987   TYPE_PRECISION (float128_type_node) = 128;
35988   layout_type (float128_type_node);
35989   lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35990 
35991   /* This macro is built by i386-builtin-types.awk.  */
35992   DEFINE_BUILTIN_PRIMITIVE_TYPES;
35993 }
35994 
35995 static void
35996 ix86_init_builtins (void)
35997 {
35998   tree t;
35999 
36000   ix86_init_builtin_types ();
36001 
36002   /* Builtins to get CPU type and features. */
36003   ix86_init_platform_type_builtins ();
36004 
36005   /* TFmode support builtins.  */
36006   def_builtin_const (0, "__builtin_infq",
36007 		     FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
36008   def_builtin_const (0, "__builtin_huge_valq",
36009 		     FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
36010 
36011   /* We will expand them to normal call if SSE isn't available since
36012      they are used by libgcc. */
36013   t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
36014   t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
36015 			    BUILT_IN_MD, "__fabstf2", NULL_TREE);
36016   TREE_READONLY (t) = 1;
36017   ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
36018 
36019   t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
36020   t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
36021 			    BUILT_IN_MD, "__copysigntf3", NULL_TREE);
36022   TREE_READONLY (t) = 1;
36023   ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
36024 
36025   ix86_init_tm_builtins ();
36026   ix86_init_mmx_sse_builtins ();
36027   ix86_init_mpx_builtins ();
36028 
36029   if (TARGET_LP64)
36030     ix86_init_builtins_va_builtins_abi ();
36031 
36032 #ifdef SUBTARGET_INIT_BUILTINS
36033   SUBTARGET_INIT_BUILTINS;
36034 #endif
36035 }
36036 
36037 /* Return the ix86 builtin for CODE.  */
36038 
36039 static tree
36040 ix86_builtin_decl (unsigned code, bool)
36041 {
36042   if (code >= IX86_BUILTIN_MAX)
36043     return error_mark_node;
36044 
36045   return ix86_builtins[code];
36046 }
36047 
36048 /* Errors in the source file can cause expand_expr to return const0_rtx
36049    where we expect a vector.  To avoid crashing, use one of the vector
36050    clear instructions.  */
36051 static rtx
36052 safe_vector_operand (rtx x, machine_mode mode)
36053 {
36054   if (x == const0_rtx)
36055     x = CONST0_RTX (mode);
36056   return x;
36057 }
36058 
36059 /* Fixup modeless constants to fit required mode.  */
36060 static rtx
36061 fixup_modeless_constant (rtx x, machine_mode mode)
36062 {
36063   if (GET_MODE (x) == VOIDmode)
36064     x = convert_to_mode (mode, x, 1);
36065   return x;
36066 }
36067 
36068 /* Subroutine of ix86_expand_builtin to take care of binop insns.  */
36069 
36070 static rtx
36071 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
36072 {
36073   rtx pat;
36074   tree arg0 = CALL_EXPR_ARG (exp, 0);
36075   tree arg1 = CALL_EXPR_ARG (exp, 1);
36076   rtx op0 = expand_normal (arg0);
36077   rtx op1 = expand_normal (arg1);
36078   machine_mode tmode = insn_data[icode].operand[0].mode;
36079   machine_mode mode0 = insn_data[icode].operand[1].mode;
36080   machine_mode mode1 = insn_data[icode].operand[2].mode;
36081 
36082   if (VECTOR_MODE_P (mode0))
36083     op0 = safe_vector_operand (op0, mode0);
36084   if (VECTOR_MODE_P (mode1))
36085     op1 = safe_vector_operand (op1, mode1);
36086 
36087   if (optimize || !target
36088       || GET_MODE (target) != tmode
36089       || !insn_data[icode].operand[0].predicate (target, tmode))
36090     target = gen_reg_rtx (tmode);
36091 
36092   if (GET_MODE (op1) == SImode && mode1 == TImode)
36093     {
36094       rtx x = gen_reg_rtx (V4SImode);
36095       emit_insn (gen_sse2_loadd (x, op1));
36096       op1 = gen_lowpart (TImode, x);
36097     }
36098 
36099   if (!insn_data[icode].operand[1].predicate (op0, mode0))
36100     op0 = copy_to_mode_reg (mode0, op0);
36101   if (!insn_data[icode].operand[2].predicate (op1, mode1))
36102     op1 = copy_to_mode_reg (mode1, op1);
36103 
36104   pat = GEN_FCN (icode) (target, op0, op1);
36105   if (! pat)
36106     return 0;
36107 
36108   emit_insn (pat);
36109 
36110   return target;
36111 }
36112 
36113 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns.  */
36114 
36115 static rtx
36116 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
36117 			       enum ix86_builtin_func_type m_type,
36118 			       enum rtx_code sub_code)
36119 {
36120   rtx pat;
36121   int i;
36122   int nargs;
36123   bool comparison_p = false;
36124   bool tf_p = false;
36125   bool last_arg_constant = false;
36126   int num_memory = 0;
36127   struct {
36128     rtx op;
36129     machine_mode mode;
36130   } args[4];
36131 
36132   machine_mode tmode = insn_data[icode].operand[0].mode;
36133 
36134   switch (m_type)
36135     {
36136     case MULTI_ARG_4_DF2_DI_I:
36137     case MULTI_ARG_4_DF2_DI_I1:
36138     case MULTI_ARG_4_SF2_SI_I:
36139     case MULTI_ARG_4_SF2_SI_I1:
36140       nargs = 4;
36141       last_arg_constant = true;
36142       break;
36143 
36144     case MULTI_ARG_3_SF:
36145     case MULTI_ARG_3_DF:
36146     case MULTI_ARG_3_SF2:
36147     case MULTI_ARG_3_DF2:
36148     case MULTI_ARG_3_DI:
36149     case MULTI_ARG_3_SI:
36150     case MULTI_ARG_3_SI_DI:
36151     case MULTI_ARG_3_HI:
36152     case MULTI_ARG_3_HI_SI:
36153     case MULTI_ARG_3_QI:
36154     case MULTI_ARG_3_DI2:
36155     case MULTI_ARG_3_SI2:
36156     case MULTI_ARG_3_HI2:
36157     case MULTI_ARG_3_QI2:
36158       nargs = 3;
36159       break;
36160 
36161     case MULTI_ARG_2_SF:
36162     case MULTI_ARG_2_DF:
36163     case MULTI_ARG_2_DI:
36164     case MULTI_ARG_2_SI:
36165     case MULTI_ARG_2_HI:
36166     case MULTI_ARG_2_QI:
36167       nargs = 2;
36168       break;
36169 
36170     case MULTI_ARG_2_DI_IMM:
36171     case MULTI_ARG_2_SI_IMM:
36172     case MULTI_ARG_2_HI_IMM:
36173     case MULTI_ARG_2_QI_IMM:
36174       nargs = 2;
36175       last_arg_constant = true;
36176       break;
36177 
36178     case MULTI_ARG_1_SF:
36179     case MULTI_ARG_1_DF:
36180     case MULTI_ARG_1_SF2:
36181     case MULTI_ARG_1_DF2:
36182     case MULTI_ARG_1_DI:
36183     case MULTI_ARG_1_SI:
36184     case MULTI_ARG_1_HI:
36185     case MULTI_ARG_1_QI:
36186     case MULTI_ARG_1_SI_DI:
36187     case MULTI_ARG_1_HI_DI:
36188     case MULTI_ARG_1_HI_SI:
36189     case MULTI_ARG_1_QI_DI:
36190     case MULTI_ARG_1_QI_SI:
36191     case MULTI_ARG_1_QI_HI:
36192       nargs = 1;
36193       break;
36194 
36195     case MULTI_ARG_2_DI_CMP:
36196     case MULTI_ARG_2_SI_CMP:
36197     case MULTI_ARG_2_HI_CMP:
36198     case MULTI_ARG_2_QI_CMP:
36199       nargs = 2;
36200       comparison_p = true;
36201       break;
36202 
36203     case MULTI_ARG_2_SF_TF:
36204     case MULTI_ARG_2_DF_TF:
36205     case MULTI_ARG_2_DI_TF:
36206     case MULTI_ARG_2_SI_TF:
36207     case MULTI_ARG_2_HI_TF:
36208     case MULTI_ARG_2_QI_TF:
36209       nargs = 2;
36210       tf_p = true;
36211       break;
36212 
36213     default:
36214       gcc_unreachable ();
36215     }
36216 
36217   if (optimize || !target
36218       || GET_MODE (target) != tmode
36219       || !insn_data[icode].operand[0].predicate (target, tmode))
36220     target = gen_reg_rtx (tmode);
36221 
36222   gcc_assert (nargs <= 4);
36223 
36224   for (i = 0; i < nargs; i++)
36225     {
36226       tree arg = CALL_EXPR_ARG (exp, i);
36227       rtx op = expand_normal (arg);
36228       int adjust = (comparison_p) ? 1 : 0;
36229       machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36230 
36231       if (last_arg_constant && i == nargs - 1)
36232 	{
36233 	  if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36234 	    {
36235 	      enum insn_code new_icode = icode;
36236 	      switch (icode)
36237 		{
36238 		case CODE_FOR_xop_vpermil2v2df3:
36239 		case CODE_FOR_xop_vpermil2v4sf3:
36240 		case CODE_FOR_xop_vpermil2v4df3:
36241 		case CODE_FOR_xop_vpermil2v8sf3:
36242 		  error ("the last argument must be a 2-bit immediate");
36243 		  return gen_reg_rtx (tmode);
36244 		case CODE_FOR_xop_rotlv2di3:
36245 		  new_icode = CODE_FOR_rotlv2di3;
36246 		  goto xop_rotl;
36247 		case CODE_FOR_xop_rotlv4si3:
36248 		  new_icode = CODE_FOR_rotlv4si3;
36249 		  goto xop_rotl;
36250 		case CODE_FOR_xop_rotlv8hi3:
36251 		  new_icode = CODE_FOR_rotlv8hi3;
36252 		  goto xop_rotl;
36253 		case CODE_FOR_xop_rotlv16qi3:
36254 		  new_icode = CODE_FOR_rotlv16qi3;
36255 		xop_rotl:
36256 		  if (CONST_INT_P (op))
36257 		    {
36258 		      int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
36259 		      op = GEN_INT (INTVAL (op) & mask);
36260 		      gcc_checking_assert
36261 			(insn_data[icode].operand[i + 1].predicate (op, mode));
36262 		    }
36263 		  else
36264 		    {
36265 		      gcc_checking_assert
36266 			(nargs == 2
36267 			 && insn_data[new_icode].operand[0].mode == tmode
36268 			 && insn_data[new_icode].operand[1].mode == tmode
36269 			 && insn_data[new_icode].operand[2].mode == mode
36270 			 && insn_data[new_icode].operand[0].predicate
36271 			    == insn_data[icode].operand[0].predicate
36272 			 && insn_data[new_icode].operand[1].predicate
36273 			    == insn_data[icode].operand[1].predicate);
36274 		      icode = new_icode;
36275 		      goto non_constant;
36276 		    }
36277 		  break;
36278 		default:
36279 		  gcc_unreachable ();
36280 		}
36281 	    }
36282 	}
36283       else
36284 	{
36285 	non_constant:
36286 	  if (VECTOR_MODE_P (mode))
36287 	    op = safe_vector_operand (op, mode);
36288 
36289 	  /* If we aren't optimizing, only allow one memory operand to be
36290 	     generated.  */
36291 	  if (memory_operand (op, mode))
36292 	    num_memory++;
36293 
36294 	  gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36295 
36296 	  if (optimize
36297 	      || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36298 	      || num_memory > 1)
36299 	    op = force_reg (mode, op);
36300 	}
36301 
36302       args[i].op = op;
36303       args[i].mode = mode;
36304     }
36305 
36306   switch (nargs)
36307     {
36308     case 1:
36309       pat = GEN_FCN (icode) (target, args[0].op);
36310       break;
36311 
36312     case 2:
36313       if (tf_p)
36314 	pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36315 			       GEN_INT ((int)sub_code));
36316       else if (! comparison_p)
36317 	pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36318       else
36319 	{
36320 	  rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36321 				       args[0].op,
36322 				       args[1].op);
36323 
36324 	  pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36325 	}
36326       break;
36327 
36328     case 3:
36329       pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36330       break;
36331 
36332     case 4:
36333       pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36334       break;
36335 
36336     default:
36337       gcc_unreachable ();
36338     }
36339 
36340   if (! pat)
36341     return 0;
36342 
36343   emit_insn (pat);
36344   return target;
36345 }
36346 
36347 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36348    insns with vec_merge.  */
36349 
36350 static rtx
36351 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36352 				    rtx target)
36353 {
36354   rtx pat;
36355   tree arg0 = CALL_EXPR_ARG (exp, 0);
36356   rtx op1, op0 = expand_normal (arg0);
36357   machine_mode tmode = insn_data[icode].operand[0].mode;
36358   machine_mode mode0 = insn_data[icode].operand[1].mode;
36359 
36360   if (optimize || !target
36361       || GET_MODE (target) != tmode
36362       || !insn_data[icode].operand[0].predicate (target, tmode))
36363     target = gen_reg_rtx (tmode);
36364 
36365   if (VECTOR_MODE_P (mode0))
36366     op0 = safe_vector_operand (op0, mode0);
36367 
36368   if ((optimize && !register_operand (op0, mode0))
36369       || !insn_data[icode].operand[1].predicate (op0, mode0))
36370     op0 = copy_to_mode_reg (mode0, op0);
36371 
36372   op1 = op0;
36373   if (!insn_data[icode].operand[2].predicate (op1, mode0))
36374     op1 = copy_to_mode_reg (mode0, op1);
36375 
36376   pat = GEN_FCN (icode) (target, op0, op1);
36377   if (! pat)
36378     return 0;
36379   emit_insn (pat);
36380   return target;
36381 }
36382 
36383 /* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
36384 
36385 static rtx
36386 ix86_expand_sse_compare (const struct builtin_description *d,
36387 			 tree exp, rtx target, bool swap)
36388 {
36389   rtx pat;
36390   tree arg0 = CALL_EXPR_ARG (exp, 0);
36391   tree arg1 = CALL_EXPR_ARG (exp, 1);
36392   rtx op0 = expand_normal (arg0);
36393   rtx op1 = expand_normal (arg1);
36394   rtx op2;
36395   machine_mode tmode = insn_data[d->icode].operand[0].mode;
36396   machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36397   machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36398   enum rtx_code comparison = d->comparison;
36399 
36400   if (VECTOR_MODE_P (mode0))
36401     op0 = safe_vector_operand (op0, mode0);
36402   if (VECTOR_MODE_P (mode1))
36403     op1 = safe_vector_operand (op1, mode1);
36404 
36405   /* Swap operands if we have a comparison that isn't available in
36406      hardware.  */
36407   if (swap)
36408     std::swap (op0, op1);
36409 
36410   if (optimize || !target
36411       || GET_MODE (target) != tmode
36412       || !insn_data[d->icode].operand[0].predicate (target, tmode))
36413     target = gen_reg_rtx (tmode);
36414 
36415   if ((optimize && !register_operand (op0, mode0))
36416       || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36417     op0 = copy_to_mode_reg (mode0, op0);
36418   if ((optimize && !register_operand (op1, mode1))
36419       || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36420     op1 = copy_to_mode_reg (mode1, op1);
36421 
36422   op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36423   pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36424   if (! pat)
36425     return 0;
36426   emit_insn (pat);
36427   return target;
36428 }
36429 
36430 /* Subroutine of ix86_expand_builtin to take care of comi insns.  */
36431 
36432 static rtx
36433 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36434 		      rtx target)
36435 {
36436   rtx pat;
36437   tree arg0 = CALL_EXPR_ARG (exp, 0);
36438   tree arg1 = CALL_EXPR_ARG (exp, 1);
36439   rtx op0 = expand_normal (arg0);
36440   rtx op1 = expand_normal (arg1);
36441   machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36442   machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36443   enum rtx_code comparison = d->comparison;
36444 
36445   if (VECTOR_MODE_P (mode0))
36446     op0 = safe_vector_operand (op0, mode0);
36447   if (VECTOR_MODE_P (mode1))
36448     op1 = safe_vector_operand (op1, mode1);
36449 
36450   /* Swap operands if we have a comparison that isn't available in
36451      hardware.  */
36452   if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36453     std::swap (op0, op1);
36454 
36455   target = gen_reg_rtx (SImode);
36456   emit_move_insn (target, const0_rtx);
36457   target = gen_rtx_SUBREG (QImode, target, 0);
36458 
36459   if ((optimize && !register_operand (op0, mode0))
36460       || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36461     op0 = copy_to_mode_reg (mode0, op0);
36462   if ((optimize && !register_operand (op1, mode1))
36463       || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36464     op1 = copy_to_mode_reg (mode1, op1);
36465 
36466   pat = GEN_FCN (d->icode) (op0, op1);
36467   if (! pat)
36468     return 0;
36469   emit_insn (pat);
36470   emit_insn (gen_rtx_SET (VOIDmode,
36471 			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36472 			  gen_rtx_fmt_ee (comparison, QImode,
36473 					  SET_DEST (pat),
36474 					  const0_rtx)));
36475 
36476   return SUBREG_REG (target);
36477 }
36478 
36479 /* Subroutines of ix86_expand_args_builtin to take care of round insns.  */
36480 
36481 static rtx
36482 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36483 		       rtx target)
36484 {
36485   rtx pat;
36486   tree arg0 = CALL_EXPR_ARG (exp, 0);
36487   rtx op1, op0 = expand_normal (arg0);
36488   machine_mode tmode = insn_data[d->icode].operand[0].mode;
36489   machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36490 
36491   if (optimize || target == 0
36492       || GET_MODE (target) != tmode
36493       || !insn_data[d->icode].operand[0].predicate (target, tmode))
36494     target = gen_reg_rtx (tmode);
36495 
36496   if (VECTOR_MODE_P (mode0))
36497     op0 = safe_vector_operand (op0, mode0);
36498 
36499   if ((optimize && !register_operand (op0, mode0))
36500       || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36501     op0 = copy_to_mode_reg (mode0, op0);
36502 
36503   op1 = GEN_INT (d->comparison);
36504 
36505   pat = GEN_FCN (d->icode) (target, op0, op1);
36506   if (! pat)
36507     return 0;
36508   emit_insn (pat);
36509   return target;
36510 }
36511 
36512 static rtx
36513 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36514 				     tree exp, rtx target)
36515 {
36516   rtx pat;
36517   tree arg0 = CALL_EXPR_ARG (exp, 0);
36518   tree arg1 = CALL_EXPR_ARG (exp, 1);
36519   rtx op0 = expand_normal (arg0);
36520   rtx op1 = expand_normal (arg1);
36521   rtx op2;
36522   machine_mode tmode = insn_data[d->icode].operand[0].mode;
36523   machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36524   machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36525 
36526   if (optimize || target == 0
36527       || GET_MODE (target) != tmode
36528       || !insn_data[d->icode].operand[0].predicate (target, tmode))
36529     target = gen_reg_rtx (tmode);
36530 
36531   op0 = safe_vector_operand (op0, mode0);
36532   op1 = safe_vector_operand (op1, mode1);
36533 
36534   if ((optimize && !register_operand (op0, mode0))
36535       || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36536     op0 = copy_to_mode_reg (mode0, op0);
36537   if ((optimize && !register_operand (op1, mode1))
36538       || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36539     op1 = copy_to_mode_reg (mode1, op1);
36540 
36541   op2 = GEN_INT (d->comparison);
36542 
36543   pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36544   if (! pat)
36545     return 0;
36546   emit_insn (pat);
36547   return target;
36548 }
36549 
36550 /* Subroutine of ix86_expand_builtin to take care of ptest insns.  */
36551 
36552 static rtx
36553 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36554 		       rtx target)
36555 {
36556   rtx pat;
36557   tree arg0 = CALL_EXPR_ARG (exp, 0);
36558   tree arg1 = CALL_EXPR_ARG (exp, 1);
36559   rtx op0 = expand_normal (arg0);
36560   rtx op1 = expand_normal (arg1);
36561   machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36562   machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36563   enum rtx_code comparison = d->comparison;
36564 
36565   if (VECTOR_MODE_P (mode0))
36566     op0 = safe_vector_operand (op0, mode0);
36567   if (VECTOR_MODE_P (mode1))
36568     op1 = safe_vector_operand (op1, mode1);
36569 
36570   target = gen_reg_rtx (SImode);
36571   emit_move_insn (target, const0_rtx);
36572   target = gen_rtx_SUBREG (QImode, target, 0);
36573 
36574   if ((optimize && !register_operand (op0, mode0))
36575       || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36576     op0 = copy_to_mode_reg (mode0, op0);
36577   if ((optimize && !register_operand (op1, mode1))
36578       || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36579     op1 = copy_to_mode_reg (mode1, op1);
36580 
36581   pat = GEN_FCN (d->icode) (op0, op1);
36582   if (! pat)
36583     return 0;
36584   emit_insn (pat);
36585   emit_insn (gen_rtx_SET (VOIDmode,
36586 			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36587 			  gen_rtx_fmt_ee (comparison, QImode,
36588 					  SET_DEST (pat),
36589 					  const0_rtx)));
36590 
36591   return SUBREG_REG (target);
36592 }
36593 
36594 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns.  */
36595 
36596 static rtx
36597 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36598 			  tree exp, rtx target)
36599 {
36600   rtx pat;
36601   tree arg0 = CALL_EXPR_ARG (exp, 0);
36602   tree arg1 = CALL_EXPR_ARG (exp, 1);
36603   tree arg2 = CALL_EXPR_ARG (exp, 2);
36604   tree arg3 = CALL_EXPR_ARG (exp, 3);
36605   tree arg4 = CALL_EXPR_ARG (exp, 4);
36606   rtx scratch0, scratch1;
36607   rtx op0 = expand_normal (arg0);
36608   rtx op1 = expand_normal (arg1);
36609   rtx op2 = expand_normal (arg2);
36610   rtx op3 = expand_normal (arg3);
36611   rtx op4 = expand_normal (arg4);
36612   machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36613 
36614   tmode0 = insn_data[d->icode].operand[0].mode;
36615   tmode1 = insn_data[d->icode].operand[1].mode;
36616   modev2 = insn_data[d->icode].operand[2].mode;
36617   modei3 = insn_data[d->icode].operand[3].mode;
36618   modev4 = insn_data[d->icode].operand[4].mode;
36619   modei5 = insn_data[d->icode].operand[5].mode;
36620   modeimm = insn_data[d->icode].operand[6].mode;
36621 
36622   if (VECTOR_MODE_P (modev2))
36623     op0 = safe_vector_operand (op0, modev2);
36624   if (VECTOR_MODE_P (modev4))
36625     op2 = safe_vector_operand (op2, modev4);
36626 
36627   if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36628     op0 = copy_to_mode_reg (modev2, op0);
36629   if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36630     op1 = copy_to_mode_reg (modei3, op1);
36631   if ((optimize && !register_operand (op2, modev4))
36632       || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36633     op2 = copy_to_mode_reg (modev4, op2);
36634   if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36635     op3 = copy_to_mode_reg (modei5, op3);
36636 
36637   if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36638     {
36639       error ("the fifth argument must be an 8-bit immediate");
36640       return const0_rtx;
36641     }
36642 
36643   if (d->code == IX86_BUILTIN_PCMPESTRI128)
36644     {
36645       if (optimize || !target
36646 	  || GET_MODE (target) != tmode0
36647 	  || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36648 	target = gen_reg_rtx (tmode0);
36649 
36650       scratch1 = gen_reg_rtx (tmode1);
36651 
36652       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36653     }
36654   else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36655     {
36656       if (optimize || !target
36657 	  || GET_MODE (target) != tmode1
36658 	  || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36659 	target = gen_reg_rtx (tmode1);
36660 
36661       scratch0 = gen_reg_rtx (tmode0);
36662 
36663       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36664     }
36665   else
36666     {
36667       gcc_assert (d->flag);
36668 
36669       scratch0 = gen_reg_rtx (tmode0);
36670       scratch1 = gen_reg_rtx (tmode1);
36671 
36672       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36673     }
36674 
36675   if (! pat)
36676     return 0;
36677 
36678   emit_insn (pat);
36679 
36680   if (d->flag)
36681     {
36682       target = gen_reg_rtx (SImode);
36683       emit_move_insn (target, const0_rtx);
36684       target = gen_rtx_SUBREG (QImode, target, 0);
36685 
36686       emit_insn
36687 	(gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36688 		      gen_rtx_fmt_ee (EQ, QImode,
36689 				      gen_rtx_REG ((machine_mode) d->flag,
36690 						   FLAGS_REG),
36691 				      const0_rtx)));
36692       return SUBREG_REG (target);
36693     }
36694   else
36695     return target;
36696 }
36697 
36698 
36699 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns.  */
36700 
36701 static rtx
36702 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36703 			  tree exp, rtx target)
36704 {
36705   rtx pat;
36706   tree arg0 = CALL_EXPR_ARG (exp, 0);
36707   tree arg1 = CALL_EXPR_ARG (exp, 1);
36708   tree arg2 = CALL_EXPR_ARG (exp, 2);
36709   rtx scratch0, scratch1;
36710   rtx op0 = expand_normal (arg0);
36711   rtx op1 = expand_normal (arg1);
36712   rtx op2 = expand_normal (arg2);
36713   machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36714 
36715   tmode0 = insn_data[d->icode].operand[0].mode;
36716   tmode1 = insn_data[d->icode].operand[1].mode;
36717   modev2 = insn_data[d->icode].operand[2].mode;
36718   modev3 = insn_data[d->icode].operand[3].mode;
36719   modeimm = insn_data[d->icode].operand[4].mode;
36720 
36721   if (VECTOR_MODE_P (modev2))
36722     op0 = safe_vector_operand (op0, modev2);
36723   if (VECTOR_MODE_P (modev3))
36724     op1 = safe_vector_operand (op1, modev3);
36725 
36726   if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36727     op0 = copy_to_mode_reg (modev2, op0);
36728   if ((optimize && !register_operand (op1, modev3))
36729       || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36730     op1 = copy_to_mode_reg (modev3, op1);
36731 
36732   if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36733     {
36734       error ("the third argument must be an 8-bit immediate");
36735       return const0_rtx;
36736     }
36737 
36738   if (d->code == IX86_BUILTIN_PCMPISTRI128)
36739     {
36740       if (optimize || !target
36741 	  || GET_MODE (target) != tmode0
36742 	  || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36743 	target = gen_reg_rtx (tmode0);
36744 
36745       scratch1 = gen_reg_rtx (tmode1);
36746 
36747       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36748     }
36749   else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36750     {
36751       if (optimize || !target
36752 	  || GET_MODE (target) != tmode1
36753 	  || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36754 	target = gen_reg_rtx (tmode1);
36755 
36756       scratch0 = gen_reg_rtx (tmode0);
36757 
36758       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36759     }
36760   else
36761     {
36762       gcc_assert (d->flag);
36763 
36764       scratch0 = gen_reg_rtx (tmode0);
36765       scratch1 = gen_reg_rtx (tmode1);
36766 
36767       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36768     }
36769 
36770   if (! pat)
36771     return 0;
36772 
36773   emit_insn (pat);
36774 
36775   if (d->flag)
36776     {
36777       target = gen_reg_rtx (SImode);
36778       emit_move_insn (target, const0_rtx);
36779       target = gen_rtx_SUBREG (QImode, target, 0);
36780 
36781       emit_insn
36782 	(gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36783 		      gen_rtx_fmt_ee (EQ, QImode,
36784 				      gen_rtx_REG ((machine_mode) d->flag,
36785 						   FLAGS_REG),
36786 				      const0_rtx)));
36787       return SUBREG_REG (target);
36788     }
36789   else
36790     return target;
36791 }
36792 
36793 /* Subroutine of ix86_expand_builtin to take care of insns with
36794    variable number of operands.  */
36795 
36796 static rtx
36797 ix86_expand_args_builtin (const struct builtin_description *d,
36798 			  tree exp, rtx target)
36799 {
36800   rtx pat, real_target;
36801   unsigned int i, nargs;
36802   unsigned int nargs_constant = 0;
36803   unsigned int mask_pos = 0;
36804   int num_memory = 0;
36805   struct
36806     {
36807       rtx op;
36808       machine_mode mode;
36809     } args[6];
36810   bool last_arg_count = false;
36811   enum insn_code icode = d->icode;
36812   const struct insn_data_d *insn_p = &insn_data[icode];
36813   machine_mode tmode = insn_p->operand[0].mode;
36814   machine_mode rmode = VOIDmode;
36815   bool swap = false;
36816   enum rtx_code comparison = d->comparison;
36817 
36818   switch ((enum ix86_builtin_func_type) d->flag)
36819     {
36820     case V2DF_FTYPE_V2DF_ROUND:
36821     case V4DF_FTYPE_V4DF_ROUND:
36822     case V4SF_FTYPE_V4SF_ROUND:
36823     case V8SF_FTYPE_V8SF_ROUND:
36824     case V4SI_FTYPE_V4SF_ROUND:
36825     case V8SI_FTYPE_V8SF_ROUND:
36826       return ix86_expand_sse_round (d, exp, target);
36827     case V4SI_FTYPE_V2DF_V2DF_ROUND:
36828     case V8SI_FTYPE_V4DF_V4DF_ROUND:
36829     case V16SI_FTYPE_V8DF_V8DF_ROUND:
36830       return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36831     case INT_FTYPE_V8SF_V8SF_PTEST:
36832     case INT_FTYPE_V4DI_V4DI_PTEST:
36833     case INT_FTYPE_V4DF_V4DF_PTEST:
36834     case INT_FTYPE_V4SF_V4SF_PTEST:
36835     case INT_FTYPE_V2DI_V2DI_PTEST:
36836     case INT_FTYPE_V2DF_V2DF_PTEST:
36837       return ix86_expand_sse_ptest (d, exp, target);
36838     case FLOAT128_FTYPE_FLOAT128:
36839     case FLOAT_FTYPE_FLOAT:
36840     case INT_FTYPE_INT:
36841     case UINT64_FTYPE_INT:
36842     case UINT16_FTYPE_UINT16:
36843     case INT64_FTYPE_INT64:
36844     case INT64_FTYPE_V4SF:
36845     case INT64_FTYPE_V2DF:
36846     case INT_FTYPE_V16QI:
36847     case INT_FTYPE_V8QI:
36848     case INT_FTYPE_V8SF:
36849     case INT_FTYPE_V4DF:
36850     case INT_FTYPE_V4SF:
36851     case INT_FTYPE_V2DF:
36852     case INT_FTYPE_V32QI:
36853     case V16QI_FTYPE_V16QI:
36854     case V8SI_FTYPE_V8SF:
36855     case V8SI_FTYPE_V4SI:
36856     case V8HI_FTYPE_V8HI:
36857     case V8HI_FTYPE_V16QI:
36858     case V8QI_FTYPE_V8QI:
36859     case V8SF_FTYPE_V8SF:
36860     case V8SF_FTYPE_V8SI:
36861     case V8SF_FTYPE_V4SF:
36862     case V8SF_FTYPE_V8HI:
36863     case V4SI_FTYPE_V4SI:
36864     case V4SI_FTYPE_V16QI:
36865     case V4SI_FTYPE_V4SF:
36866     case V4SI_FTYPE_V8SI:
36867     case V4SI_FTYPE_V8HI:
36868     case V4SI_FTYPE_V4DF:
36869     case V4SI_FTYPE_V2DF:
36870     case V4HI_FTYPE_V4HI:
36871     case V4DF_FTYPE_V4DF:
36872     case V4DF_FTYPE_V4SI:
36873     case V4DF_FTYPE_V4SF:
36874     case V4DF_FTYPE_V2DF:
36875     case V4SF_FTYPE_V4SF:
36876     case V4SF_FTYPE_V4SI:
36877     case V4SF_FTYPE_V8SF:
36878     case V4SF_FTYPE_V4DF:
36879     case V4SF_FTYPE_V8HI:
36880     case V4SF_FTYPE_V2DF:
36881     case V2DI_FTYPE_V2DI:
36882     case V2DI_FTYPE_V16QI:
36883     case V2DI_FTYPE_V8HI:
36884     case V2DI_FTYPE_V4SI:
36885     case V2DF_FTYPE_V2DF:
36886     case V2DF_FTYPE_V4SI:
36887     case V2DF_FTYPE_V4DF:
36888     case V2DF_FTYPE_V4SF:
36889     case V2DF_FTYPE_V2SI:
36890     case V2SI_FTYPE_V2SI:
36891     case V2SI_FTYPE_V4SF:
36892     case V2SI_FTYPE_V2SF:
36893     case V2SI_FTYPE_V2DF:
36894     case V2SF_FTYPE_V2SF:
36895     case V2SF_FTYPE_V2SI:
36896     case V32QI_FTYPE_V32QI:
36897     case V32QI_FTYPE_V16QI:
36898     case V16HI_FTYPE_V16HI:
36899     case V16HI_FTYPE_V8HI:
36900     case V8SI_FTYPE_V8SI:
36901     case V16HI_FTYPE_V16QI:
36902     case V8SI_FTYPE_V16QI:
36903     case V4DI_FTYPE_V16QI:
36904     case V8SI_FTYPE_V8HI:
36905     case V4DI_FTYPE_V8HI:
36906     case V4DI_FTYPE_V4SI:
36907     case V4DI_FTYPE_V2DI:
36908     case HI_FTYPE_HI:
36909     case HI_FTYPE_V16QI:
36910     case SI_FTYPE_V32QI:
36911     case DI_FTYPE_V64QI:
36912     case V16QI_FTYPE_HI:
36913     case V32QI_FTYPE_SI:
36914     case V64QI_FTYPE_DI:
36915     case V8HI_FTYPE_QI:
36916     case V16HI_FTYPE_HI:
36917     case V32HI_FTYPE_SI:
36918     case V4SI_FTYPE_QI:
36919     case V8SI_FTYPE_QI:
36920     case V4SI_FTYPE_HI:
36921     case V8SI_FTYPE_HI:
36922     case QI_FTYPE_V8HI:
36923     case HI_FTYPE_V16HI:
36924     case SI_FTYPE_V32HI:
36925     case QI_FTYPE_V4SI:
36926     case QI_FTYPE_V8SI:
36927     case HI_FTYPE_V16SI:
36928     case QI_FTYPE_V2DI:
36929     case QI_FTYPE_V4DI:
36930     case QI_FTYPE_V8DI:
36931     case UINT_FTYPE_V2DF:
36932     case UINT_FTYPE_V4SF:
36933     case UINT64_FTYPE_V2DF:
36934     case UINT64_FTYPE_V4SF:
36935     case V16QI_FTYPE_V8DI:
36936     case V16HI_FTYPE_V16SI:
36937     case V16SI_FTYPE_HI:
36938     case V2DI_FTYPE_QI:
36939     case V4DI_FTYPE_QI:
36940     case V16SI_FTYPE_V16SI:
36941     case V16SI_FTYPE_INT:
36942     case V16SF_FTYPE_FLOAT:
36943     case V16SF_FTYPE_V8SF:
36944     case V16SI_FTYPE_V8SI:
36945     case V16SF_FTYPE_V4SF:
36946     case V16SI_FTYPE_V4SI:
36947     case V16SF_FTYPE_V16SF:
36948     case V8HI_FTYPE_V8DI:
36949     case V8UHI_FTYPE_V8UHI:
36950     case V8SI_FTYPE_V8DI:
36951     case V8SF_FTYPE_V8DF:
36952     case V8DI_FTYPE_QI:
36953     case V8DI_FTYPE_INT64:
36954     case V8DI_FTYPE_V4DI:
36955     case V8DI_FTYPE_V8DI:
36956     case V8DF_FTYPE_DOUBLE:
36957     case V8DF_FTYPE_V4DF:
36958     case V8DF_FTYPE_V2DF:
36959     case V8DF_FTYPE_V8DF:
36960     case V8DF_FTYPE_V8SI:
36961       nargs = 1;
36962       break;
36963     case V4SF_FTYPE_V4SF_VEC_MERGE:
36964     case V2DF_FTYPE_V2DF_VEC_MERGE:
36965       return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36966     case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36967     case V16QI_FTYPE_V16QI_V16QI:
36968     case V16QI_FTYPE_V8HI_V8HI:
36969     case V16SI_FTYPE_V16SI_V16SI:
36970     case V16SF_FTYPE_V16SF_V16SF:
36971     case V16SF_FTYPE_V16SF_V16SI:
36972     case V8QI_FTYPE_V8QI_V8QI:
36973     case V8QI_FTYPE_V4HI_V4HI:
36974     case V8HI_FTYPE_V8HI_V8HI:
36975     case V8HI_FTYPE_V16QI_V16QI:
36976     case V8HI_FTYPE_V4SI_V4SI:
36977     case V8SF_FTYPE_V8SF_V8SF:
36978     case V8SF_FTYPE_V8SF_V8SI:
36979     case V8DI_FTYPE_V8DI_V8DI:
36980     case V8DF_FTYPE_V8DF_V8DF:
36981     case V8DF_FTYPE_V8DF_V8DI:
36982     case V4SI_FTYPE_V4SI_V4SI:
36983     case V4SI_FTYPE_V8HI_V8HI:
36984     case V4SI_FTYPE_V4SF_V4SF:
36985     case V4SI_FTYPE_V2DF_V2DF:
36986     case V4HI_FTYPE_V4HI_V4HI:
36987     case V4HI_FTYPE_V8QI_V8QI:
36988     case V4HI_FTYPE_V2SI_V2SI:
36989     case V4DF_FTYPE_V4DF_V4DF:
36990     case V4DF_FTYPE_V4DF_V4DI:
36991     case V4SF_FTYPE_V4SF_V4SF:
36992     case V4SF_FTYPE_V4SF_V4SI:
36993     case V4SF_FTYPE_V4SF_V2SI:
36994     case V4SF_FTYPE_V4SF_V2DF:
36995     case V4SF_FTYPE_V4SF_UINT:
36996     case V4SF_FTYPE_V4SF_UINT64:
36997     case V4SF_FTYPE_V4SF_DI:
36998     case V4SF_FTYPE_V4SF_SI:
36999     case V2DI_FTYPE_V2DI_V2DI:
37000     case V2DI_FTYPE_V16QI_V16QI:
37001     case V2DI_FTYPE_V4SI_V4SI:
37002     case V2UDI_FTYPE_V4USI_V4USI:
37003     case V2DI_FTYPE_V2DI_V16QI:
37004     case V2DI_FTYPE_V2DF_V2DF:
37005     case V2SI_FTYPE_V2SI_V2SI:
37006     case V2SI_FTYPE_V4HI_V4HI:
37007     case V2SI_FTYPE_V2SF_V2SF:
37008     case V2DF_FTYPE_V2DF_V2DF:
37009     case V2DF_FTYPE_V2DF_V4SF:
37010     case V2DF_FTYPE_V2DF_V2DI:
37011     case V2DF_FTYPE_V2DF_DI:
37012     case V2DF_FTYPE_V2DF_SI:
37013     case V2DF_FTYPE_V2DF_UINT:
37014     case V2DF_FTYPE_V2DF_UINT64:
37015     case V2SF_FTYPE_V2SF_V2SF:
37016     case V1DI_FTYPE_V1DI_V1DI:
37017     case V1DI_FTYPE_V8QI_V8QI:
37018     case V1DI_FTYPE_V2SI_V2SI:
37019     case V32QI_FTYPE_V16HI_V16HI:
37020     case V16HI_FTYPE_V8SI_V8SI:
37021     case V32QI_FTYPE_V32QI_V32QI:
37022     case V16HI_FTYPE_V32QI_V32QI:
37023     case V16HI_FTYPE_V16HI_V16HI:
37024     case V8SI_FTYPE_V4DF_V4DF:
37025     case V8SI_FTYPE_V8SI_V8SI:
37026     case V8SI_FTYPE_V16HI_V16HI:
37027     case V4DI_FTYPE_V4DI_V4DI:
37028     case V4DI_FTYPE_V8SI_V8SI:
37029     case V4UDI_FTYPE_V8USI_V8USI:
37030     case QI_FTYPE_V8DI_V8DI:
37031     case V8DI_FTYPE_V64QI_V64QI:
37032     case HI_FTYPE_V16SI_V16SI:
37033       if (comparison == UNKNOWN)
37034 	return ix86_expand_binop_builtin (icode, exp, target);
37035       nargs = 2;
37036       break;
37037     case V4SF_FTYPE_V4SF_V4SF_SWAP:
37038     case V2DF_FTYPE_V2DF_V2DF_SWAP:
37039       gcc_assert (comparison != UNKNOWN);
37040       nargs = 2;
37041       swap = true;
37042       break;
37043     case V16HI_FTYPE_V16HI_V8HI_COUNT:
37044     case V16HI_FTYPE_V16HI_SI_COUNT:
37045     case V8SI_FTYPE_V8SI_V4SI_COUNT:
37046     case V8SI_FTYPE_V8SI_SI_COUNT:
37047     case V4DI_FTYPE_V4DI_V2DI_COUNT:
37048     case V4DI_FTYPE_V4DI_INT_COUNT:
37049     case V8HI_FTYPE_V8HI_V8HI_COUNT:
37050     case V8HI_FTYPE_V8HI_SI_COUNT:
37051     case V4SI_FTYPE_V4SI_V4SI_COUNT:
37052     case V4SI_FTYPE_V4SI_SI_COUNT:
37053     case V4HI_FTYPE_V4HI_V4HI_COUNT:
37054     case V4HI_FTYPE_V4HI_SI_COUNT:
37055     case V2DI_FTYPE_V2DI_V2DI_COUNT:
37056     case V2DI_FTYPE_V2DI_SI_COUNT:
37057     case V2SI_FTYPE_V2SI_V2SI_COUNT:
37058     case V2SI_FTYPE_V2SI_SI_COUNT:
37059     case V1DI_FTYPE_V1DI_V1DI_COUNT:
37060     case V1DI_FTYPE_V1DI_SI_COUNT:
37061       nargs = 2;
37062       last_arg_count = true;
37063       break;
37064     case UINT64_FTYPE_UINT64_UINT64:
37065     case UINT_FTYPE_UINT_UINT:
37066     case UINT_FTYPE_UINT_USHORT:
37067     case UINT_FTYPE_UINT_UCHAR:
37068     case UINT16_FTYPE_UINT16_INT:
37069     case UINT8_FTYPE_UINT8_INT:
37070     case HI_FTYPE_HI_HI:
37071     case SI_FTYPE_SI_SI:
37072     case DI_FTYPE_DI_DI:
37073     case V16SI_FTYPE_V8DF_V8DF:
37074       nargs = 2;
37075       break;
37076     case V2DI_FTYPE_V2DI_INT_CONVERT:
37077       nargs = 2;
37078       rmode = V1TImode;
37079       nargs_constant = 1;
37080       break;
37081     case V4DI_FTYPE_V4DI_INT_CONVERT:
37082       nargs = 2;
37083       rmode = V2TImode;
37084       nargs_constant = 1;
37085       break;
37086     case V8DI_FTYPE_V8DI_INT_CONVERT:
37087       nargs = 2;
37088       rmode = V4TImode;
37089       nargs_constant = 1;
37090       break;
37091     case V8HI_FTYPE_V8HI_INT:
37092     case V8HI_FTYPE_V8SF_INT:
37093     case V16HI_FTYPE_V16SF_INT:
37094     case V8HI_FTYPE_V4SF_INT:
37095     case V8SF_FTYPE_V8SF_INT:
37096     case V4SF_FTYPE_V16SF_INT:
37097     case V16SF_FTYPE_V16SF_INT:
37098     case V4SI_FTYPE_V4SI_INT:
37099     case V4SI_FTYPE_V8SI_INT:
37100     case V4HI_FTYPE_V4HI_INT:
37101     case V4DF_FTYPE_V4DF_INT:
37102     case V4DF_FTYPE_V8DF_INT:
37103     case V4SF_FTYPE_V4SF_INT:
37104     case V4SF_FTYPE_V8SF_INT:
37105     case V2DI_FTYPE_V2DI_INT:
37106     case V2DF_FTYPE_V2DF_INT:
37107     case V2DF_FTYPE_V4DF_INT:
37108     case V16HI_FTYPE_V16HI_INT:
37109     case V8SI_FTYPE_V8SI_INT:
37110     case V16SI_FTYPE_V16SI_INT:
37111     case V4SI_FTYPE_V16SI_INT:
37112     case V4DI_FTYPE_V4DI_INT:
37113     case V2DI_FTYPE_V4DI_INT:
37114     case V4DI_FTYPE_V8DI_INT:
37115     case HI_FTYPE_HI_INT:
37116     case QI_FTYPE_V4SF_INT:
37117     case QI_FTYPE_V2DF_INT:
37118       nargs = 2;
37119       nargs_constant = 1;
37120       break;
37121     case V16QI_FTYPE_V16QI_V16QI_V16QI:
37122     case V8SF_FTYPE_V8SF_V8SF_V8SF:
37123     case V4DF_FTYPE_V4DF_V4DF_V4DF:
37124     case V4SF_FTYPE_V4SF_V4SF_V4SF:
37125     case V2DF_FTYPE_V2DF_V2DF_V2DF:
37126     case V32QI_FTYPE_V32QI_V32QI_V32QI:
37127     case HI_FTYPE_V16SI_V16SI_HI:
37128     case QI_FTYPE_V8DI_V8DI_QI:
37129     case V16HI_FTYPE_V16SI_V16HI_HI:
37130     case V16QI_FTYPE_V16SI_V16QI_HI:
37131     case V16QI_FTYPE_V8DI_V16QI_QI:
37132     case V16SF_FTYPE_V16SF_V16SF_HI:
37133     case V16SF_FTYPE_V16SF_V16SF_V16SF:
37134     case V16SF_FTYPE_V16SF_V16SI_V16SF:
37135     case V16SF_FTYPE_V16SI_V16SF_HI:
37136     case V16SF_FTYPE_V16SI_V16SF_V16SF:
37137     case V16SF_FTYPE_V4SF_V16SF_HI:
37138     case V16SI_FTYPE_SI_V16SI_HI:
37139     case V16SI_FTYPE_V16HI_V16SI_HI:
37140     case V16SI_FTYPE_V16QI_V16SI_HI:
37141     case V16SI_FTYPE_V16SF_V16SI_HI:
37142     case V8SF_FTYPE_V4SF_V8SF_QI:
37143     case V4DF_FTYPE_V2DF_V4DF_QI:
37144     case V8SI_FTYPE_V4SI_V8SI_QI:
37145     case V8SI_FTYPE_SI_V8SI_QI:
37146     case V4SI_FTYPE_V4SI_V4SI_QI:
37147     case V4SI_FTYPE_SI_V4SI_QI:
37148     case V4DI_FTYPE_V2DI_V4DI_QI:
37149     case V4DI_FTYPE_DI_V4DI_QI:
37150     case V2DI_FTYPE_V2DI_V2DI_QI:
37151     case V2DI_FTYPE_DI_V2DI_QI:
37152     case V64QI_FTYPE_V64QI_V64QI_DI:
37153     case V64QI_FTYPE_V16QI_V64QI_DI:
37154     case V64QI_FTYPE_QI_V64QI_DI:
37155     case V32QI_FTYPE_V32QI_V32QI_SI:
37156     case V32QI_FTYPE_V16QI_V32QI_SI:
37157     case V32QI_FTYPE_QI_V32QI_SI:
37158     case V16QI_FTYPE_V16QI_V16QI_HI:
37159     case V16QI_FTYPE_QI_V16QI_HI:
37160     case V32HI_FTYPE_V8HI_V32HI_SI:
37161     case V32HI_FTYPE_HI_V32HI_SI:
37162     case V16HI_FTYPE_V8HI_V16HI_HI:
37163     case V16HI_FTYPE_HI_V16HI_HI:
37164     case V8HI_FTYPE_V8HI_V8HI_QI:
37165     case V8HI_FTYPE_HI_V8HI_QI:
37166     case V8SF_FTYPE_V8HI_V8SF_QI:
37167     case V4SF_FTYPE_V8HI_V4SF_QI:
37168     case V8SI_FTYPE_V8SF_V8SI_QI:
37169     case V4SI_FTYPE_V4SF_V4SI_QI:
37170     case V8DI_FTYPE_V8SF_V8DI_QI:
37171     case V4DI_FTYPE_V4SF_V4DI_QI:
37172     case V2DI_FTYPE_V4SF_V2DI_QI:
37173     case V8SF_FTYPE_V8DI_V8SF_QI:
37174     case V4SF_FTYPE_V4DI_V4SF_QI:
37175     case V4SF_FTYPE_V2DI_V4SF_QI:
37176     case V8DF_FTYPE_V8DI_V8DF_QI:
37177     case V4DF_FTYPE_V4DI_V4DF_QI:
37178     case V2DF_FTYPE_V2DI_V2DF_QI:
37179     case V16QI_FTYPE_V8HI_V16QI_QI:
37180     case V16QI_FTYPE_V16HI_V16QI_HI:
37181     case V16QI_FTYPE_V4SI_V16QI_QI:
37182     case V16QI_FTYPE_V8SI_V16QI_QI:
37183     case V8HI_FTYPE_V4SI_V8HI_QI:
37184     case V8HI_FTYPE_V8SI_V8HI_QI:
37185     case V16QI_FTYPE_V2DI_V16QI_QI:
37186     case V16QI_FTYPE_V4DI_V16QI_QI:
37187     case V8HI_FTYPE_V2DI_V8HI_QI:
37188     case V8HI_FTYPE_V4DI_V8HI_QI:
37189     case V4SI_FTYPE_V2DI_V4SI_QI:
37190     case V4SI_FTYPE_V4DI_V4SI_QI:
37191     case V32QI_FTYPE_V32HI_V32QI_SI:
37192     case HI_FTYPE_V16QI_V16QI_HI:
37193     case SI_FTYPE_V32QI_V32QI_SI:
37194     case DI_FTYPE_V64QI_V64QI_DI:
37195     case QI_FTYPE_V8HI_V8HI_QI:
37196     case HI_FTYPE_V16HI_V16HI_HI:
37197     case SI_FTYPE_V32HI_V32HI_SI:
37198     case QI_FTYPE_V4SI_V4SI_QI:
37199     case QI_FTYPE_V8SI_V8SI_QI:
37200     case QI_FTYPE_V2DI_V2DI_QI:
37201     case QI_FTYPE_V4DI_V4DI_QI:
37202     case V4SF_FTYPE_V2DF_V4SF_QI:
37203     case V4SF_FTYPE_V4DF_V4SF_QI:
37204     case V16SI_FTYPE_V16SI_V16SI_HI:
37205     case V16SI_FTYPE_V16SI_V16SI_V16SI:
37206     case V16SI_FTYPE_V4SI_V16SI_HI:
37207     case V2DI_FTYPE_V2DI_V2DI_V2DI:
37208     case V2DI_FTYPE_V4SI_V2DI_QI:
37209     case V2DI_FTYPE_V8HI_V2DI_QI:
37210     case V2DI_FTYPE_V16QI_V2DI_QI:
37211     case V4DI_FTYPE_V4DI_V4DI_QI:
37212     case V4DI_FTYPE_V4SI_V4DI_QI:
37213     case V4DI_FTYPE_V8HI_V4DI_QI:
37214     case V4DI_FTYPE_V16QI_V4DI_QI:
37215     case V8DI_FTYPE_V8DF_V8DI_QI:
37216     case V4DI_FTYPE_V4DF_V4DI_QI:
37217     case V2DI_FTYPE_V2DF_V2DI_QI:
37218     case V4SI_FTYPE_V4DF_V4SI_QI:
37219     case V4SI_FTYPE_V2DF_V4SI_QI:
37220     case V4SI_FTYPE_V8HI_V4SI_QI:
37221     case V4SI_FTYPE_V16QI_V4SI_QI:
37222     case V8SI_FTYPE_V8SI_V8SI_V8SI:
37223     case V4DI_FTYPE_V4DI_V4DI_V4DI:
37224     case V8DF_FTYPE_V2DF_V8DF_QI:
37225     case V8DF_FTYPE_V4DF_V8DF_QI:
37226     case V8DF_FTYPE_V8DF_V8DF_QI:
37227     case V8DF_FTYPE_V8DF_V8DF_V8DF:
37228     case V8SF_FTYPE_V8SF_V8SF_QI:
37229     case V8SF_FTYPE_V8SI_V8SF_QI:
37230     case V4DF_FTYPE_V4DF_V4DF_QI:
37231     case V4SF_FTYPE_V4SF_V4SF_QI:
37232     case V2DF_FTYPE_V2DF_V2DF_QI:
37233     case V2DF_FTYPE_V4SF_V2DF_QI:
37234     case V2DF_FTYPE_V4SI_V2DF_QI:
37235     case V4SF_FTYPE_V4SI_V4SF_QI:
37236     case V4DF_FTYPE_V4SF_V4DF_QI:
37237     case V4DF_FTYPE_V4SI_V4DF_QI:
37238     case V8SI_FTYPE_V8SI_V8SI_QI:
37239     case V8SI_FTYPE_V8HI_V8SI_QI:
37240     case V8SI_FTYPE_V16QI_V8SI_QI:
37241     case V8DF_FTYPE_V8DF_V8DI_V8DF:
37242     case V8DF_FTYPE_V8DI_V8DF_V8DF:
37243     case V8DF_FTYPE_V8SF_V8DF_QI:
37244     case V8DF_FTYPE_V8SI_V8DF_QI:
37245     case V8DI_FTYPE_DI_V8DI_QI:
37246     case V16SF_FTYPE_V8SF_V16SF_HI:
37247     case V16SI_FTYPE_V8SI_V16SI_HI:
37248     case V16HI_FTYPE_V16HI_V16HI_HI:
37249     case V8HI_FTYPE_V16QI_V8HI_QI:
37250     case V16HI_FTYPE_V16QI_V16HI_HI:
37251     case V32HI_FTYPE_V32HI_V32HI_SI:
37252     case V32HI_FTYPE_V32QI_V32HI_SI:
37253     case V8DI_FTYPE_V16QI_V8DI_QI:
37254     case V8DI_FTYPE_V2DI_V8DI_QI:
37255     case V8DI_FTYPE_V4DI_V8DI_QI:
37256     case V8DI_FTYPE_V8DI_V8DI_QI:
37257     case V8DI_FTYPE_V8DI_V8DI_V8DI:
37258     case V8DI_FTYPE_V8HI_V8DI_QI:
37259     case V8DI_FTYPE_V8SI_V8DI_QI:
37260     case V8HI_FTYPE_V8DI_V8HI_QI:
37261     case V8SF_FTYPE_V8DF_V8SF_QI:
37262     case V8SI_FTYPE_V8DF_V8SI_QI:
37263     case V8SI_FTYPE_V8DI_V8SI_QI:
37264     case V4SI_FTYPE_V4SI_V4SI_V4SI:
37265       nargs = 3;
37266       break;
37267     case V32QI_FTYPE_V32QI_V32QI_INT:
37268     case V16HI_FTYPE_V16HI_V16HI_INT:
37269     case V16QI_FTYPE_V16QI_V16QI_INT:
37270     case V4DI_FTYPE_V4DI_V4DI_INT:
37271     case V8HI_FTYPE_V8HI_V8HI_INT:
37272     case V8SI_FTYPE_V8SI_V8SI_INT:
37273     case V8SI_FTYPE_V8SI_V4SI_INT:
37274     case V8SF_FTYPE_V8SF_V8SF_INT:
37275     case V8SF_FTYPE_V8SF_V4SF_INT:
37276     case V4SI_FTYPE_V4SI_V4SI_INT:
37277     case V4DF_FTYPE_V4DF_V4DF_INT:
37278     case V16SF_FTYPE_V16SF_V16SF_INT:
37279     case V16SF_FTYPE_V16SF_V4SF_INT:
37280     case V16SI_FTYPE_V16SI_V4SI_INT:
37281     case V4DF_FTYPE_V4DF_V2DF_INT:
37282     case V4SF_FTYPE_V4SF_V4SF_INT:
37283     case V2DI_FTYPE_V2DI_V2DI_INT:
37284     case V4DI_FTYPE_V4DI_V2DI_INT:
37285     case V2DF_FTYPE_V2DF_V2DF_INT:
37286     case QI_FTYPE_V8DI_V8DI_INT:
37287     case QI_FTYPE_V8DF_V8DF_INT:
37288     case QI_FTYPE_V2DF_V2DF_INT:
37289     case QI_FTYPE_V4SF_V4SF_INT:
37290     case HI_FTYPE_V16SI_V16SI_INT:
37291     case HI_FTYPE_V16SF_V16SF_INT:
37292       nargs = 3;
37293       nargs_constant = 1;
37294       break;
37295     case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37296       nargs = 3;
37297       rmode = V4DImode;
37298       nargs_constant = 1;
37299       break;
37300     case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37301       nargs = 3;
37302       rmode = V2DImode;
37303       nargs_constant = 1;
37304       break;
37305     case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37306       nargs = 3;
37307       rmode = DImode;
37308       nargs_constant = 1;
37309       break;
37310     case V2DI_FTYPE_V2DI_UINT_UINT:
37311       nargs = 3;
37312       nargs_constant = 2;
37313       break;
37314     case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37315       nargs = 3;
37316       rmode = V8DImode;
37317       nargs_constant = 1;
37318       break;
37319     case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37320       nargs = 5;
37321       rmode = V8DImode;
37322       mask_pos = 2;
37323       nargs_constant = 1;
37324       break;
37325     case QI_FTYPE_V8DF_INT_QI:
37326     case QI_FTYPE_V4DF_INT_QI:
37327     case QI_FTYPE_V2DF_INT_QI:
37328     case HI_FTYPE_V16SF_INT_HI:
37329     case QI_FTYPE_V8SF_INT_QI:
37330     case QI_FTYPE_V4SF_INT_QI:
37331       nargs = 3;
37332       mask_pos = 1;
37333       nargs_constant = 1;
37334       break;
37335     case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37336       nargs = 5;
37337       rmode = V4DImode;
37338       mask_pos = 2;
37339       nargs_constant = 1;
37340       break;
37341     case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37342       nargs = 5;
37343       rmode = V2DImode;
37344       mask_pos = 2;
37345       nargs_constant = 1;
37346       break;
37347     case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37348     case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37349     case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37350     case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37351     case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37352     case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37353     case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37354     case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37355     case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37356     case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37357     case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37358     case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37359     case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37360     case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37361     case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37362     case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37363     case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37364     case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37365     case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37366     case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37367     case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37368     case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37369     case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37370     case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37371     case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37372     case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37373     case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37374     case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37375     case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37376     case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37377     case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37378     case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37379     case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37380     case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37381     case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37382     case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37383     case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37384     case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37385     case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37386     case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37387     case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37388     case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37389     case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37390     case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37391     case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37392     case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37393     case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37394     case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37395     case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37396     case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37397     case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37398     case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37399     case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37400     case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37401       nargs = 4;
37402       break;
37403     case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37404     case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37405     case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37406     case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37407     case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37408       nargs = 4;
37409       nargs_constant = 1;
37410       break;
37411     case QI_FTYPE_V4DI_V4DI_INT_QI:
37412     case QI_FTYPE_V8SI_V8SI_INT_QI:
37413     case QI_FTYPE_V4DF_V4DF_INT_QI:
37414     case QI_FTYPE_V8SF_V8SF_INT_QI:
37415     case QI_FTYPE_V2DI_V2DI_INT_QI:
37416     case QI_FTYPE_V4SI_V4SI_INT_QI:
37417     case QI_FTYPE_V2DF_V2DF_INT_QI:
37418     case QI_FTYPE_V4SF_V4SF_INT_QI:
37419     case DI_FTYPE_V64QI_V64QI_INT_DI:
37420     case SI_FTYPE_V32QI_V32QI_INT_SI:
37421     case HI_FTYPE_V16QI_V16QI_INT_HI:
37422     case SI_FTYPE_V32HI_V32HI_INT_SI:
37423     case HI_FTYPE_V16HI_V16HI_INT_HI:
37424     case QI_FTYPE_V8HI_V8HI_INT_QI:
37425       nargs = 4;
37426       mask_pos = 1;
37427       nargs_constant = 1;
37428       break;
37429     case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37430       nargs = 4;
37431       nargs_constant = 2;
37432       break;
37433     case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37434     case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37435       nargs = 4;
37436       break;
37437     case QI_FTYPE_V8DI_V8DI_INT_QI:
37438     case HI_FTYPE_V16SI_V16SI_INT_HI:
37439     case QI_FTYPE_V8DF_V8DF_INT_QI:
37440     case HI_FTYPE_V16SF_V16SF_INT_HI:
37441       mask_pos = 1;
37442       nargs = 4;
37443       nargs_constant = 1;
37444       break;
37445     case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37446     case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37447     case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37448     case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37449     case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37450     case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37451     case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37452     case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37453     case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37454     case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37455     case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37456     case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37457     case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37458     case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37459     case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37460     case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37461     case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37462     case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37463     case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37464     case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37465     case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37466     case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37467     case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37468     case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37469     case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37470     case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37471     case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37472     case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37473     case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37474     case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37475       nargs = 4;
37476       mask_pos = 2;
37477       nargs_constant = 1;
37478       break;
37479     case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37480     case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37481     case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37482     case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37483     case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37484     case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37485     case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37486     case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37487     case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37488     case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37489     case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37490     case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37491     case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37492     case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37493     case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37494     case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37495     case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37496     case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37497     case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37498     case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37499     case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37500     case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37501     case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37502     case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37503     case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37504     case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37505     case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37506       nargs = 5;
37507       mask_pos = 2;
37508       nargs_constant = 1;
37509       break;
37510     case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37511     case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37512     case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37513     case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37514     case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37515     case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37516     case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37517     case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37518     case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37519     case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37520     case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37521        nargs = 5;
37522       nargs = 5;
37523       mask_pos = 1;
37524       nargs_constant = 1;
37525       break;
37526 
37527     default:
37528       gcc_unreachable ();
37529     }
37530 
37531   gcc_assert (nargs <= ARRAY_SIZE (args));
37532 
37533   if (comparison != UNKNOWN)
37534     {
37535       gcc_assert (nargs == 2);
37536       return ix86_expand_sse_compare (d, exp, target, swap);
37537     }
37538 
37539   if (rmode == VOIDmode || rmode == tmode)
37540     {
37541       if (optimize
37542 	  || target == 0
37543 	  || GET_MODE (target) != tmode
37544 	  || !insn_p->operand[0].predicate (target, tmode))
37545 	target = gen_reg_rtx (tmode);
37546       real_target = target;
37547     }
37548   else
37549     {
37550       real_target = gen_reg_rtx (tmode);
37551       target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37552     }
37553 
37554   for (i = 0; i < nargs; i++)
37555     {
37556       tree arg = CALL_EXPR_ARG (exp, i);
37557       rtx op = expand_normal (arg);
37558       machine_mode mode = insn_p->operand[i + 1].mode;
37559       bool match = insn_p->operand[i + 1].predicate (op, mode);
37560 
37561       if (last_arg_count && (i + 1) == nargs)
37562 	{
37563 	  /* SIMD shift insns take either an 8-bit immediate or
37564 	     register as count.  But builtin functions take int as
37565 	     count.  If count doesn't match, we put it in register.  */
37566 	  if (!match)
37567 	    {
37568 	      op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37569 	      if (!insn_p->operand[i + 1].predicate (op, mode))
37570 		op = copy_to_reg (op);
37571 	    }
37572 	}
37573       else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37574 	       (!mask_pos && (nargs - i) <= nargs_constant))
37575 	{
37576 	  if (!match)
37577 	    switch (icode)
37578 	      {
37579 	      case CODE_FOR_avx_vinsertf128v4di:
37580 	      case CODE_FOR_avx_vextractf128v4di:
37581 		error ("the last argument must be an 1-bit immediate");
37582 		return const0_rtx;
37583 
37584 	      case CODE_FOR_avx512f_cmpv8di3_mask:
37585 	      case CODE_FOR_avx512f_cmpv16si3_mask:
37586 	      case CODE_FOR_avx512f_ucmpv8di3_mask:
37587 	      case CODE_FOR_avx512f_ucmpv16si3_mask:
37588 	      case CODE_FOR_avx512vl_cmpv4di3_mask:
37589 	      case CODE_FOR_avx512vl_cmpv8si3_mask:
37590 	      case CODE_FOR_avx512vl_ucmpv4di3_mask:
37591 	      case CODE_FOR_avx512vl_ucmpv8si3_mask:
37592 	      case CODE_FOR_avx512vl_cmpv2di3_mask:
37593 	      case CODE_FOR_avx512vl_cmpv4si3_mask:
37594 	      case CODE_FOR_avx512vl_ucmpv2di3_mask:
37595 	      case CODE_FOR_avx512vl_ucmpv4si3_mask:
37596 		error ("the last argument must be a 3-bit immediate");
37597 		return const0_rtx;
37598 
37599 	      case CODE_FOR_sse4_1_roundsd:
37600 	      case CODE_FOR_sse4_1_roundss:
37601 
37602 	      case CODE_FOR_sse4_1_roundpd:
37603 	      case CODE_FOR_sse4_1_roundps:
37604 	      case CODE_FOR_avx_roundpd256:
37605 	      case CODE_FOR_avx_roundps256:
37606 
37607 	      case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37608 	      case CODE_FOR_sse4_1_roundps_sfix:
37609 	      case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37610 	      case CODE_FOR_avx_roundps_sfix256:
37611 
37612 	      case CODE_FOR_sse4_1_blendps:
37613 	      case CODE_FOR_avx_blendpd256:
37614 	      case CODE_FOR_avx_vpermilv4df:
37615 	      case CODE_FOR_avx_vpermilv4df_mask:
37616 	      case CODE_FOR_avx512f_getmantv8df_mask:
37617 	      case CODE_FOR_avx512f_getmantv16sf_mask:
37618 	      case CODE_FOR_avx512vl_getmantv8sf_mask:
37619 	      case CODE_FOR_avx512vl_getmantv4df_mask:
37620 	      case CODE_FOR_avx512vl_getmantv4sf_mask:
37621 	      case CODE_FOR_avx512vl_getmantv2df_mask:
37622 	      case CODE_FOR_avx512dq_rangepv8df_mask_round:
37623 	      case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37624 	      case CODE_FOR_avx512dq_rangepv4df_mask:
37625 	      case CODE_FOR_avx512dq_rangepv8sf_mask:
37626 	      case CODE_FOR_avx512dq_rangepv2df_mask:
37627 	      case CODE_FOR_avx512dq_rangepv4sf_mask:
37628 	      case CODE_FOR_avx_shufpd256_mask:
37629 		error ("the last argument must be a 4-bit immediate");
37630 		return const0_rtx;
37631 
37632 	      case CODE_FOR_sha1rnds4:
37633 	      case CODE_FOR_sse4_1_blendpd:
37634 	      case CODE_FOR_avx_vpermilv2df:
37635 	      case CODE_FOR_avx_vpermilv2df_mask:
37636 	      case CODE_FOR_xop_vpermil2v2df3:
37637 	      case CODE_FOR_xop_vpermil2v4sf3:
37638 	      case CODE_FOR_xop_vpermil2v4df3:
37639 	      case CODE_FOR_xop_vpermil2v8sf3:
37640 	      case CODE_FOR_avx512f_vinsertf32x4_mask:
37641 	      case CODE_FOR_avx512f_vinserti32x4_mask:
37642 	      case CODE_FOR_avx512f_vextractf32x4_mask:
37643 	      case CODE_FOR_avx512f_vextracti32x4_mask:
37644 	      case CODE_FOR_sse2_shufpd:
37645 	      case CODE_FOR_sse2_shufpd_mask:
37646 	      case CODE_FOR_avx512dq_shuf_f64x2_mask:
37647 	      case CODE_FOR_avx512dq_shuf_i64x2_mask:
37648 	      case CODE_FOR_avx512vl_shuf_i32x4_mask:
37649 	      case CODE_FOR_avx512vl_shuf_f32x4_mask:
37650 		error ("the last argument must be a 2-bit immediate");
37651 		return const0_rtx;
37652 
37653 	      case CODE_FOR_avx_vextractf128v4df:
37654 	      case CODE_FOR_avx_vextractf128v8sf:
37655 	      case CODE_FOR_avx_vextractf128v8si:
37656 	      case CODE_FOR_avx_vinsertf128v4df:
37657 	      case CODE_FOR_avx_vinsertf128v8sf:
37658 	      case CODE_FOR_avx_vinsertf128v8si:
37659 	      case CODE_FOR_avx512f_vinsertf64x4_mask:
37660 	      case CODE_FOR_avx512f_vinserti64x4_mask:
37661 	      case CODE_FOR_avx512f_vextractf64x4_mask:
37662 	      case CODE_FOR_avx512f_vextracti64x4_mask:
37663 	      case CODE_FOR_avx512dq_vinsertf32x8_mask:
37664 	      case CODE_FOR_avx512dq_vinserti32x8_mask:
37665 	      case CODE_FOR_avx512vl_vinsertv4df:
37666 	      case CODE_FOR_avx512vl_vinsertv4di:
37667 	      case CODE_FOR_avx512vl_vinsertv8sf:
37668 	      case CODE_FOR_avx512vl_vinsertv8si:
37669 		error ("the last argument must be a 1-bit immediate");
37670 		return const0_rtx;
37671 
37672 	      case CODE_FOR_avx_vmcmpv2df3:
37673 	      case CODE_FOR_avx_vmcmpv4sf3:
37674 	      case CODE_FOR_avx_cmpv2df3:
37675 	      case CODE_FOR_avx_cmpv4sf3:
37676 	      case CODE_FOR_avx_cmpv4df3:
37677 	      case CODE_FOR_avx_cmpv8sf3:
37678 	      case CODE_FOR_avx512f_cmpv8df3_mask:
37679 	      case CODE_FOR_avx512f_cmpv16sf3_mask:
37680 	      case CODE_FOR_avx512f_vmcmpv2df3_mask:
37681 	      case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37682 		error ("the last argument must be a 5-bit immediate");
37683 		return const0_rtx;
37684 
37685 	      default:
37686 		switch (nargs_constant)
37687 		  {
37688 		  case 2:
37689 		    if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37690 			(!mask_pos && (nargs - i) == nargs_constant))
37691 		      {
37692 			error ("the next to last argument must be an 8-bit immediate");
37693 			break;
37694 		      }
37695 		  case 1:
37696 		    error ("the last argument must be an 8-bit immediate");
37697 		    break;
37698 		  default:
37699 		    gcc_unreachable ();
37700 		  }
37701 		return const0_rtx;
37702 	      }
37703 	}
37704       else
37705 	{
37706 	  if (VECTOR_MODE_P (mode))
37707 	    op = safe_vector_operand (op, mode);
37708 
37709 	  /* If we aren't optimizing, only allow one memory operand to
37710 	     be generated.  */
37711 	  if (memory_operand (op, mode))
37712 	    num_memory++;
37713 
37714 	  op = fixup_modeless_constant (op, mode);
37715 
37716 	  if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37717 	    {
37718 	      if (optimize || !match || num_memory > 1)
37719 		op = copy_to_mode_reg (mode, op);
37720 	    }
37721 	  else
37722 	    {
37723 	      op = copy_to_reg (op);
37724 	      op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37725 	    }
37726 	}
37727 
37728       args[i].op = op;
37729       args[i].mode = mode;
37730     }
37731 
37732   switch (nargs)
37733     {
37734     case 1:
37735       pat = GEN_FCN (icode) (real_target, args[0].op);
37736       break;
37737     case 2:
37738       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37739       break;
37740     case 3:
37741       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37742 			     args[2].op);
37743       break;
37744     case 4:
37745       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37746 			     args[2].op, args[3].op);
37747       break;
37748     case 5:
37749       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37750 			     args[2].op, args[3].op, args[4].op);
37751     case 6:
37752       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37753 			     args[2].op, args[3].op, args[4].op,
37754 			     args[5].op);
37755       break;
37756     default:
37757       gcc_unreachable ();
37758     }
37759 
37760   if (! pat)
37761     return 0;
37762 
37763   emit_insn (pat);
37764   return target;
37765 }
37766 
37767 /* Transform pattern of following layout:
37768      (parallel [
37769        set (A B)
37770        (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37771      ])
37772    into:
37773      (set (A B))
37774 
37775    Or:
37776      (parallel [ A B
37777      ...
37778      (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37779      ...
37780      ])
37781    into:
37782      (parallel [ A B ... ])  */
37783 
37784 static rtx
37785 ix86_erase_embedded_rounding (rtx pat)
37786 {
37787   if (GET_CODE (pat) == INSN)
37788     pat = PATTERN (pat);
37789 
37790   gcc_assert (GET_CODE (pat) == PARALLEL);
37791 
37792   if (XVECLEN (pat, 0) == 2)
37793     {
37794       rtx p0 = XVECEXP (pat, 0, 0);
37795       rtx p1 = XVECEXP (pat, 0, 1);
37796 
37797       gcc_assert (GET_CODE (p0) == SET
37798 		  && GET_CODE (p1) == UNSPEC
37799 		  && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37800 
37801       return p0;
37802     }
37803   else
37804     {
37805       rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37806       int i = 0;
37807       int j = 0;
37808 
37809       for (; i < XVECLEN (pat, 0); ++i)
37810 	{
37811 	  rtx elem = XVECEXP (pat, 0, i);
37812 	  if (GET_CODE (elem) != UNSPEC
37813 	      || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37814 	    res [j++] = elem;
37815 	}
37816 
37817       /*  No more than 1 occurence was removed.  */
37818       gcc_assert (j >= XVECLEN (pat, 0) - 1);
37819 
37820       return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37821     }
37822 }
37823 
37824 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37825    with rounding.  */
37826 static rtx
37827 ix86_expand_sse_comi_round (const struct builtin_description *d,
37828 			    tree exp, rtx target)
37829 {
37830   rtx pat, set_dst;
37831   tree arg0 = CALL_EXPR_ARG (exp, 0);
37832   tree arg1 = CALL_EXPR_ARG (exp, 1);
37833   tree arg2 = CALL_EXPR_ARG (exp, 2);
37834   tree arg3 = CALL_EXPR_ARG (exp, 3);
37835   rtx op0 = expand_normal (arg0);
37836   rtx op1 = expand_normal (arg1);
37837   rtx op2 = expand_normal (arg2);
37838   rtx op3 = expand_normal (arg3);
37839   enum insn_code icode = d->icode;
37840   const struct insn_data_d *insn_p = &insn_data[icode];
37841   machine_mode mode0 = insn_p->operand[0].mode;
37842   machine_mode mode1 = insn_p->operand[1].mode;
37843   enum rtx_code comparison = UNEQ;
37844   bool need_ucomi = false;
37845 
37846   /* See avxintrin.h for values.  */
37847   enum rtx_code comi_comparisons[32] =
37848     {
37849       UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37850       UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37851       UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37852     };
37853   bool need_ucomi_values[32] =
37854     {
37855       true,  false, false, true,  true,  false, false, true,
37856       true,  false, false, true,  true,  false, false, true,
37857       false, true,  true,  false, false, true,  true,  false,
37858       false, true,  true,  false, false, true,  true,  false
37859     };
37860 
37861   if (!CONST_INT_P (op2))
37862     {
37863       error ("the third argument must be comparison constant");
37864       return const0_rtx;
37865     }
37866   if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37867     {
37868       error ("incorrect comparison mode");
37869       return const0_rtx;
37870     }
37871 
37872   if (!insn_p->operand[2].predicate (op3, SImode))
37873     {
37874       error ("incorrect rounding operand");
37875       return const0_rtx;
37876     }
37877 
37878   comparison = comi_comparisons[INTVAL (op2)];
37879   need_ucomi = need_ucomi_values[INTVAL (op2)];
37880 
37881   if (VECTOR_MODE_P (mode0))
37882     op0 = safe_vector_operand (op0, mode0);
37883   if (VECTOR_MODE_P (mode1))
37884     op1 = safe_vector_operand (op1, mode1);
37885 
37886   target = gen_reg_rtx (SImode);
37887   emit_move_insn (target, const0_rtx);
37888   target = gen_rtx_SUBREG (QImode, target, 0);
37889 
37890   if ((optimize && !register_operand (op0, mode0))
37891       || !insn_p->operand[0].predicate (op0, mode0))
37892     op0 = copy_to_mode_reg (mode0, op0);
37893   if ((optimize && !register_operand (op1, mode1))
37894       || !insn_p->operand[1].predicate (op1, mode1))
37895     op1 = copy_to_mode_reg (mode1, op1);
37896 
37897   if (need_ucomi)
37898     icode = icode == CODE_FOR_sse_comi_round
37899 		     ? CODE_FOR_sse_ucomi_round
37900 		     : CODE_FOR_sse2_ucomi_round;
37901 
37902   pat = GEN_FCN (icode) (op0, op1, op3);
37903   if (! pat)
37904     return 0;
37905 
37906   /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point.  */
37907   if (INTVAL (op3) == NO_ROUND)
37908     {
37909       pat = ix86_erase_embedded_rounding (pat);
37910       if (! pat)
37911 	return 0;
37912 
37913       set_dst = SET_DEST (pat);
37914     }
37915   else
37916     {
37917       gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37918       set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37919     }
37920 
37921   emit_insn (pat);
37922   emit_insn (gen_rtx_SET (VOIDmode,
37923 			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37924 			  gen_rtx_fmt_ee (comparison, QImode,
37925 					  set_dst,
37926 					  const0_rtx)));
37927 
37928   return SUBREG_REG (target);
37929 }
37930 
37931 static rtx
37932 ix86_expand_round_builtin (const struct builtin_description *d,
37933 			   tree exp, rtx target)
37934 {
37935   rtx pat;
37936   unsigned int i, nargs;
37937   struct
37938     {
37939       rtx op;
37940       machine_mode mode;
37941     } args[6];
37942   enum insn_code icode = d->icode;
37943   const struct insn_data_d *insn_p = &insn_data[icode];
37944   machine_mode tmode = insn_p->operand[0].mode;
37945   unsigned int nargs_constant = 0;
37946   unsigned int redundant_embed_rnd = 0;
37947 
37948   switch ((enum ix86_builtin_func_type) d->flag)
37949     {
37950     case UINT64_FTYPE_V2DF_INT:
37951     case UINT64_FTYPE_V4SF_INT:
37952     case UINT_FTYPE_V2DF_INT:
37953     case UINT_FTYPE_V4SF_INT:
37954     case INT64_FTYPE_V2DF_INT:
37955     case INT64_FTYPE_V4SF_INT:
37956     case INT_FTYPE_V2DF_INT:
37957     case INT_FTYPE_V4SF_INT:
37958       nargs = 2;
37959       break;
37960     case V4SF_FTYPE_V4SF_UINT_INT:
37961     case V4SF_FTYPE_V4SF_UINT64_INT:
37962     case V2DF_FTYPE_V2DF_UINT64_INT:
37963     case V4SF_FTYPE_V4SF_INT_INT:
37964     case V4SF_FTYPE_V4SF_INT64_INT:
37965     case V2DF_FTYPE_V2DF_INT64_INT:
37966     case V4SF_FTYPE_V4SF_V4SF_INT:
37967     case V2DF_FTYPE_V2DF_V2DF_INT:
37968     case V4SF_FTYPE_V4SF_V2DF_INT:
37969     case V2DF_FTYPE_V2DF_V4SF_INT:
37970       nargs = 3;
37971       break;
37972     case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37973     case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37974     case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37975     case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37976     case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37977     case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37978     case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37979     case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37980     case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37981     case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37982     case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37983     case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37984     case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37985     case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37986       nargs = 4;
37987       break;
37988     case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37989     case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37990       nargs_constant = 2;
37991       nargs = 4;
37992       break;
37993     case INT_FTYPE_V4SF_V4SF_INT_INT:
37994     case INT_FTYPE_V2DF_V2DF_INT_INT:
37995       return ix86_expand_sse_comi_round (d, exp, target);
37996     case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37997     case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37998     case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37999     case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
38000     case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
38001     case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
38002       nargs = 5;
38003       break;
38004     case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
38005     case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
38006       nargs_constant = 4;
38007       nargs = 5;
38008       break;
38009     case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
38010     case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
38011     case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
38012     case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
38013       nargs_constant = 3;
38014       nargs = 5;
38015       break;
38016     case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
38017     case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
38018     case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
38019     case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
38020       nargs = 6;
38021       nargs_constant = 4;
38022       break;
38023     case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
38024     case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
38025     case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
38026     case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
38027       nargs = 6;
38028       nargs_constant = 3;
38029       break;
38030     default:
38031       gcc_unreachable ();
38032     }
38033   gcc_assert (nargs <= ARRAY_SIZE (args));
38034 
38035   if (optimize
38036       || target == 0
38037       || GET_MODE (target) != tmode
38038       || !insn_p->operand[0].predicate (target, tmode))
38039     target = gen_reg_rtx (tmode);
38040 
38041   for (i = 0; i < nargs; i++)
38042     {
38043       tree arg = CALL_EXPR_ARG (exp, i);
38044       rtx op = expand_normal (arg);
38045       machine_mode mode = insn_p->operand[i + 1].mode;
38046       bool match = insn_p->operand[i + 1].predicate (op, mode);
38047 
38048       if (i == nargs - nargs_constant)
38049 	{
38050 	  if (!match)
38051 	    {
38052 	      switch (icode)
38053 		{
38054 		case CODE_FOR_avx512f_getmantv8df_mask_round:
38055 		case CODE_FOR_avx512f_getmantv16sf_mask_round:
38056 		case CODE_FOR_avx512f_vgetmantv2df_round:
38057 		case CODE_FOR_avx512f_vgetmantv4sf_round:
38058 		  error ("the immediate argument must be a 4-bit immediate");
38059 		  return const0_rtx;
38060 		case CODE_FOR_avx512f_cmpv8df3_mask_round:
38061 		case CODE_FOR_avx512f_cmpv16sf3_mask_round:
38062 		case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
38063 		case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
38064 		  error ("the immediate argument must be a 5-bit immediate");
38065 		  return const0_rtx;
38066 		default:
38067 		  error ("the immediate argument must be an 8-bit immediate");
38068 		  return const0_rtx;
38069 		}
38070 	    }
38071 	}
38072       else if (i == nargs-1)
38073 	{
38074 	  if (!insn_p->operand[nargs].predicate (op, SImode))
38075 	    {
38076 	      error ("incorrect rounding operand");
38077 	      return const0_rtx;
38078 	    }
38079 
38080 	  /* If there is no rounding use normal version of the pattern.  */
38081 	  if (INTVAL (op) == NO_ROUND)
38082 	    redundant_embed_rnd = 1;
38083 	}
38084       else
38085 	{
38086 	  if (VECTOR_MODE_P (mode))
38087 	    op = safe_vector_operand (op, mode);
38088 
38089 	  op = fixup_modeless_constant (op, mode);
38090 
38091 	  if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38092 	    {
38093 	      if (optimize || !match)
38094 		op = copy_to_mode_reg (mode, op);
38095 	    }
38096 	  else
38097 	    {
38098 	      op = copy_to_reg (op);
38099 	      op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38100 	    }
38101 	}
38102 
38103       args[i].op = op;
38104       args[i].mode = mode;
38105     }
38106 
38107   switch (nargs)
38108     {
38109     case 1:
38110       pat = GEN_FCN (icode) (target, args[0].op);
38111       break;
38112     case 2:
38113       pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38114       break;
38115     case 3:
38116       pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38117 			     args[2].op);
38118       break;
38119     case 4:
38120       pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38121 			     args[2].op, args[3].op);
38122       break;
38123     case 5:
38124       pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38125 			     args[2].op, args[3].op, args[4].op);
38126     case 6:
38127       pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38128 			     args[2].op, args[3].op, args[4].op,
38129 			     args[5].op);
38130       break;
38131     default:
38132       gcc_unreachable ();
38133     }
38134 
38135   if (!pat)
38136     return 0;
38137 
38138   if (redundant_embed_rnd)
38139     pat = ix86_erase_embedded_rounding (pat);
38140 
38141   emit_insn (pat);
38142   return target;
38143 }
38144 
38145 /* Subroutine of ix86_expand_builtin to take care of special insns
38146    with variable number of operands.  */
38147 
38148 static rtx
38149 ix86_expand_special_args_builtin (const struct builtin_description *d,
38150 				  tree exp, rtx target)
38151 {
38152   tree arg;
38153   rtx pat, op;
38154   unsigned int i, nargs, arg_adjust, memory;
38155   bool aligned_mem = false;
38156   struct
38157     {
38158       rtx op;
38159       machine_mode mode;
38160     } args[3];
38161   enum insn_code icode = d->icode;
38162   bool last_arg_constant = false;
38163   const struct insn_data_d *insn_p = &insn_data[icode];
38164   machine_mode tmode = insn_p->operand[0].mode;
38165   enum { load, store } klass;
38166 
38167   switch ((enum ix86_builtin_func_type) d->flag)
38168     {
38169     case VOID_FTYPE_VOID:
38170       emit_insn (GEN_FCN (icode) (target));
38171       return 0;
38172     case VOID_FTYPE_UINT64:
38173     case VOID_FTYPE_UNSIGNED:
38174       nargs = 0;
38175       klass = store;
38176       memory = 0;
38177       break;
38178 
38179     case INT_FTYPE_VOID:
38180     case USHORT_FTYPE_VOID:
38181     case UINT64_FTYPE_VOID:
38182     case UNSIGNED_FTYPE_VOID:
38183       nargs = 0;
38184       klass = load;
38185       memory = 0;
38186       break;
38187     case UINT64_FTYPE_PUNSIGNED:
38188     case V2DI_FTYPE_PV2DI:
38189     case V4DI_FTYPE_PV4DI:
38190     case V32QI_FTYPE_PCCHAR:
38191     case V16QI_FTYPE_PCCHAR:
38192     case V8SF_FTYPE_PCV4SF:
38193     case V8SF_FTYPE_PCFLOAT:
38194     case V4SF_FTYPE_PCFLOAT:
38195     case V4DF_FTYPE_PCV2DF:
38196     case V4DF_FTYPE_PCDOUBLE:
38197     case V2DF_FTYPE_PCDOUBLE:
38198     case VOID_FTYPE_PVOID:
38199     case V16SI_FTYPE_PV4SI:
38200     case V16SF_FTYPE_PV4SF:
38201     case V8DI_FTYPE_PV4DI:
38202     case V8DI_FTYPE_PV8DI:
38203     case V8DF_FTYPE_PV4DF:
38204       nargs = 1;
38205       klass = load;
38206       memory = 0;
38207       switch (icode)
38208 	{
38209 	case CODE_FOR_sse4_1_movntdqa:
38210 	case CODE_FOR_avx2_movntdqa:
38211 	case CODE_FOR_avx512f_movntdqa:
38212 	  aligned_mem = true;
38213 	  break;
38214 	default:
38215 	  break;
38216 	}
38217       break;
38218     case VOID_FTYPE_PV2SF_V4SF:
38219     case VOID_FTYPE_PV8DI_V8DI:
38220     case VOID_FTYPE_PV4DI_V4DI:
38221     case VOID_FTYPE_PV2DI_V2DI:
38222     case VOID_FTYPE_PCHAR_V32QI:
38223     case VOID_FTYPE_PCHAR_V16QI:
38224     case VOID_FTYPE_PFLOAT_V16SF:
38225     case VOID_FTYPE_PFLOAT_V8SF:
38226     case VOID_FTYPE_PFLOAT_V4SF:
38227     case VOID_FTYPE_PDOUBLE_V8DF:
38228     case VOID_FTYPE_PDOUBLE_V4DF:
38229     case VOID_FTYPE_PDOUBLE_V2DF:
38230     case VOID_FTYPE_PLONGLONG_LONGLONG:
38231     case VOID_FTYPE_PULONGLONG_ULONGLONG:
38232     case VOID_FTYPE_PINT_INT:
38233       nargs = 1;
38234       klass = store;
38235       /* Reserve memory operand for target.  */
38236       memory = ARRAY_SIZE (args);
38237       switch (icode)
38238 	{
38239 	/* These builtins and instructions require the memory
38240 	   to be properly aligned.  */
38241 	case CODE_FOR_avx_movntv4di:
38242 	case CODE_FOR_sse2_movntv2di:
38243 	case CODE_FOR_avx_movntv8sf:
38244 	case CODE_FOR_sse_movntv4sf:
38245 	case CODE_FOR_sse4a_vmmovntv4sf:
38246 	case CODE_FOR_avx_movntv4df:
38247 	case CODE_FOR_sse2_movntv2df:
38248 	case CODE_FOR_sse4a_vmmovntv2df:
38249 	case CODE_FOR_sse2_movntidi:
38250 	case CODE_FOR_sse_movntq:
38251 	case CODE_FOR_sse2_movntisi:
38252 	case CODE_FOR_avx512f_movntv16sf:
38253 	case CODE_FOR_avx512f_movntv8df:
38254 	case CODE_FOR_avx512f_movntv8di:
38255 	  aligned_mem = true;
38256 	  break;
38257 	default:
38258 	  break;
38259 	}
38260       break;
38261     case V4SF_FTYPE_V4SF_PCV2SF:
38262     case V2DF_FTYPE_V2DF_PCDOUBLE:
38263       nargs = 2;
38264       klass = load;
38265       memory = 1;
38266       break;
38267     case V8SF_FTYPE_PCV8SF_V8SI:
38268     case V4DF_FTYPE_PCV4DF_V4DI:
38269     case V4SF_FTYPE_PCV4SF_V4SI:
38270     case V2DF_FTYPE_PCV2DF_V2DI:
38271     case V8SI_FTYPE_PCV8SI_V8SI:
38272     case V4DI_FTYPE_PCV4DI_V4DI:
38273     case V4SI_FTYPE_PCV4SI_V4SI:
38274     case V2DI_FTYPE_PCV2DI_V2DI:
38275       nargs = 2;
38276       klass = load;
38277       memory = 0;
38278       break;
38279     case VOID_FTYPE_PV8DF_V8DF_QI:
38280     case VOID_FTYPE_PV4DF_V4DF_QI:
38281     case VOID_FTYPE_PV2DF_V2DF_QI:
38282     case VOID_FTYPE_PV16SF_V16SF_HI:
38283     case VOID_FTYPE_PV8SF_V8SF_QI:
38284     case VOID_FTYPE_PV4SF_V4SF_QI:
38285     case VOID_FTYPE_PV8DI_V8DI_QI:
38286     case VOID_FTYPE_PV4DI_V4DI_QI:
38287     case VOID_FTYPE_PV2DI_V2DI_QI:
38288     case VOID_FTYPE_PV16SI_V16SI_HI:
38289     case VOID_FTYPE_PV8SI_V8SI_QI:
38290     case VOID_FTYPE_PV4SI_V4SI_QI:
38291       switch (icode)
38292 	{
38293 	/* These builtins and instructions require the memory
38294 	   to be properly aligned.  */
38295 	case CODE_FOR_avx512f_storev16sf_mask:
38296 	case CODE_FOR_avx512f_storev16si_mask:
38297 	case CODE_FOR_avx512f_storev8df_mask:
38298 	case CODE_FOR_avx512f_storev8di_mask:
38299 	case CODE_FOR_avx512vl_storev8sf_mask:
38300 	case CODE_FOR_avx512vl_storev8si_mask:
38301 	case CODE_FOR_avx512vl_storev4df_mask:
38302 	case CODE_FOR_avx512vl_storev4di_mask:
38303 	case CODE_FOR_avx512vl_storev4sf_mask:
38304 	case CODE_FOR_avx512vl_storev4si_mask:
38305 	case CODE_FOR_avx512vl_storev2df_mask:
38306 	case CODE_FOR_avx512vl_storev2di_mask:
38307 	  aligned_mem = true;
38308 	  break;
38309 	default:
38310 	  break;
38311 	}
38312       /* FALLTHRU */
38313     case VOID_FTYPE_PV8SF_V8SI_V8SF:
38314     case VOID_FTYPE_PV4DF_V4DI_V4DF:
38315     case VOID_FTYPE_PV4SF_V4SI_V4SF:
38316     case VOID_FTYPE_PV2DF_V2DI_V2DF:
38317     case VOID_FTYPE_PV8SI_V8SI_V8SI:
38318     case VOID_FTYPE_PV4DI_V4DI_V4DI:
38319     case VOID_FTYPE_PV4SI_V4SI_V4SI:
38320     case VOID_FTYPE_PV2DI_V2DI_V2DI:
38321     case VOID_FTYPE_PDOUBLE_V2DF_QI:
38322     case VOID_FTYPE_PFLOAT_V4SF_QI:
38323     case VOID_FTYPE_PV8SI_V8DI_QI:
38324     case VOID_FTYPE_PV8HI_V8DI_QI:
38325     case VOID_FTYPE_PV16HI_V16SI_HI:
38326     case VOID_FTYPE_PV16QI_V8DI_QI:
38327     case VOID_FTYPE_PV16QI_V16SI_HI:
38328     case VOID_FTYPE_PV4SI_V4DI_QI:
38329     case VOID_FTYPE_PV4SI_V2DI_QI:
38330     case VOID_FTYPE_PV8HI_V4DI_QI:
38331     case VOID_FTYPE_PV8HI_V2DI_QI:
38332     case VOID_FTYPE_PV8HI_V8SI_QI:
38333     case VOID_FTYPE_PV8HI_V4SI_QI:
38334     case VOID_FTYPE_PV16QI_V4DI_QI:
38335     case VOID_FTYPE_PV16QI_V2DI_QI:
38336     case VOID_FTYPE_PV16QI_V8SI_QI:
38337     case VOID_FTYPE_PV16QI_V4SI_QI:
38338     case VOID_FTYPE_PV8HI_V8HI_QI:
38339     case VOID_FTYPE_PV16HI_V16HI_HI:
38340     case VOID_FTYPE_PV32HI_V32HI_SI:
38341     case VOID_FTYPE_PV16QI_V16QI_HI:
38342     case VOID_FTYPE_PV32QI_V32QI_SI:
38343     case VOID_FTYPE_PV64QI_V64QI_DI:
38344       nargs = 2;
38345       klass = store;
38346       /* Reserve memory operand for target.  */
38347       memory = ARRAY_SIZE (args);
38348       break;
38349     case V4SF_FTYPE_PCV4SF_V4SF_QI:
38350     case V8SF_FTYPE_PCV8SF_V8SF_QI:
38351     case V16SF_FTYPE_PCV16SF_V16SF_HI:
38352     case V4SI_FTYPE_PCV4SI_V4SI_QI:
38353     case V8SI_FTYPE_PCV8SI_V8SI_QI:
38354     case V16SI_FTYPE_PCV16SI_V16SI_HI:
38355     case V2DF_FTYPE_PCV2DF_V2DF_QI:
38356     case V4DF_FTYPE_PCV4DF_V4DF_QI:
38357     case V8DF_FTYPE_PCV8DF_V8DF_QI:
38358     case V2DI_FTYPE_PCV2DI_V2DI_QI:
38359     case V4DI_FTYPE_PCV4DI_V4DI_QI:
38360     case V8DI_FTYPE_PCV8DI_V8DI_QI:
38361     case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38362     case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38363     case V8HI_FTYPE_PCV8HI_V8HI_QI:
38364     case V16HI_FTYPE_PCV16HI_V16HI_HI:
38365     case V32HI_FTYPE_PCV32HI_V32HI_SI:
38366     case V16QI_FTYPE_PCV16QI_V16QI_HI:
38367     case V32QI_FTYPE_PCV32QI_V32QI_SI:
38368     case V64QI_FTYPE_PCV64QI_V64QI_DI:
38369       nargs = 3;
38370       klass = load;
38371       memory = 0;
38372       switch (icode)
38373 	{
38374 	/* These builtins and instructions require the memory
38375 	   to be properly aligned.  */
38376 	case CODE_FOR_avx512f_loadv16sf_mask:
38377 	case CODE_FOR_avx512f_loadv16si_mask:
38378 	case CODE_FOR_avx512f_loadv8df_mask:
38379 	case CODE_FOR_avx512f_loadv8di_mask:
38380 	case CODE_FOR_avx512vl_loadv8sf_mask:
38381 	case CODE_FOR_avx512vl_loadv8si_mask:
38382 	case CODE_FOR_avx512vl_loadv4df_mask:
38383 	case CODE_FOR_avx512vl_loadv4di_mask:
38384 	case CODE_FOR_avx512vl_loadv4sf_mask:
38385 	case CODE_FOR_avx512vl_loadv4si_mask:
38386 	case CODE_FOR_avx512vl_loadv2df_mask:
38387 	case CODE_FOR_avx512vl_loadv2di_mask:
38388 	case CODE_FOR_avx512bw_loadv64qi_mask:
38389 	case CODE_FOR_avx512vl_loadv32qi_mask:
38390 	case CODE_FOR_avx512vl_loadv16qi_mask:
38391 	case CODE_FOR_avx512bw_loadv32hi_mask:
38392 	case CODE_FOR_avx512vl_loadv16hi_mask:
38393 	case CODE_FOR_avx512vl_loadv8hi_mask:
38394 	  aligned_mem = true;
38395 	  break;
38396 	default:
38397 	  break;
38398 	}
38399       break;
38400     case VOID_FTYPE_UINT_UINT_UINT:
38401     case VOID_FTYPE_UINT64_UINT_UINT:
38402     case UCHAR_FTYPE_UINT_UINT_UINT:
38403     case UCHAR_FTYPE_UINT64_UINT_UINT:
38404       nargs = 3;
38405       klass = load;
38406       memory = ARRAY_SIZE (args);
38407       last_arg_constant = true;
38408       break;
38409     default:
38410       gcc_unreachable ();
38411     }
38412 
38413   gcc_assert (nargs <= ARRAY_SIZE (args));
38414 
38415   if (klass == store)
38416     {
38417       arg = CALL_EXPR_ARG (exp, 0);
38418       op = expand_normal (arg);
38419       gcc_assert (target == 0);
38420       if (memory)
38421 	{
38422 	  op = ix86_zero_extend_to_Pmode (op);
38423 	  target = gen_rtx_MEM (tmode, op);
38424 	  /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38425 	     on it.  Try to improve it using get_pointer_alignment,
38426 	     and if the special builtin is one that requires strict
38427 	     mode alignment, also from it's GET_MODE_ALIGNMENT.
38428 	     Failure to do so could lead to ix86_legitimate_combined_insn
38429 	     rejecting all changes to such insns.  */
38430 	  unsigned int align = get_pointer_alignment (arg);
38431 	  if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38432 	    align = GET_MODE_ALIGNMENT (tmode);
38433 	  if (MEM_ALIGN (target) < align)
38434 	    set_mem_align (target, align);
38435 	}
38436       else
38437 	target = force_reg (tmode, op);
38438       arg_adjust = 1;
38439     }
38440   else
38441     {
38442       arg_adjust = 0;
38443       if (optimize
38444 	  || target == 0
38445 	  || !register_operand (target, tmode)
38446 	  || GET_MODE (target) != tmode)
38447 	target = gen_reg_rtx (tmode);
38448     }
38449 
38450   for (i = 0; i < nargs; i++)
38451     {
38452       machine_mode mode = insn_p->operand[i + 1].mode;
38453       bool match;
38454 
38455       arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38456       op = expand_normal (arg);
38457       match = insn_p->operand[i + 1].predicate (op, mode);
38458 
38459       if (last_arg_constant && (i + 1) == nargs)
38460 	{
38461 	  if (!match)
38462 	    {
38463 	      if (icode == CODE_FOR_lwp_lwpvalsi3
38464 		  || icode == CODE_FOR_lwp_lwpinssi3
38465 		  || icode == CODE_FOR_lwp_lwpvaldi3
38466 		  || icode == CODE_FOR_lwp_lwpinsdi3)
38467 		error ("the last argument must be a 32-bit immediate");
38468 	      else
38469 		error ("the last argument must be an 8-bit immediate");
38470 	      return const0_rtx;
38471 	    }
38472 	}
38473       else
38474 	{
38475 	  if (i == memory)
38476 	    {
38477 	      /* This must be the memory operand.  */
38478 	      op = ix86_zero_extend_to_Pmode (op);
38479 	      op = gen_rtx_MEM (mode, op);
38480 	      /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38481 		 on it.  Try to improve it using get_pointer_alignment,
38482 		 and if the special builtin is one that requires strict
38483 		 mode alignment, also from it's GET_MODE_ALIGNMENT.
38484 		 Failure to do so could lead to ix86_legitimate_combined_insn
38485 		 rejecting all changes to such insns.  */
38486 	      unsigned int align = get_pointer_alignment (arg);
38487 	      if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38488 		align = GET_MODE_ALIGNMENT (mode);
38489 	      if (MEM_ALIGN (op) < align)
38490 		set_mem_align (op, align);
38491 	    }
38492 	  else
38493 	    {
38494 	      /* This must be register.  */
38495 	      if (VECTOR_MODE_P (mode))
38496 		op = safe_vector_operand (op, mode);
38497 
38498 	      op = fixup_modeless_constant (op, mode);
38499 
38500 	      if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38501 		op = copy_to_mode_reg (mode, op);
38502 	      else
38503 	        {
38504 	          op = copy_to_reg (op);
38505 	          op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38506 	        }
38507 	    }
38508 	}
38509 
38510       args[i].op = op;
38511       args[i].mode = mode;
38512     }
38513 
38514   switch (nargs)
38515     {
38516     case 0:
38517       pat = GEN_FCN (icode) (target);
38518       break;
38519     case 1:
38520       pat = GEN_FCN (icode) (target, args[0].op);
38521       break;
38522     case 2:
38523       pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38524       break;
38525     case 3:
38526       pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38527       break;
38528     default:
38529       gcc_unreachable ();
38530     }
38531 
38532   if (! pat)
38533     return 0;
38534   emit_insn (pat);
38535   return klass == store ? 0 : target;
38536 }
38537 
38538 /* Return the integer constant in ARG.  Constrain it to be in the range
38539    of the subparts of VEC_TYPE; issue an error if not.  */
38540 
38541 static int
38542 get_element_number (tree vec_type, tree arg)
38543 {
38544   unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38545 
38546   if (!tree_fits_uhwi_p (arg)
38547       || (elt = tree_to_uhwi (arg), elt > max))
38548     {
38549       error ("selector must be an integer constant in the range 0..%wi", max);
38550       return 0;
38551     }
38552 
38553   return elt;
38554 }
38555 
38556 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
38557    ix86_expand_vector_init.  We DO have language-level syntax for this, in
38558    the form of  (type){ init-list }.  Except that since we can't place emms
38559    instructions from inside the compiler, we can't allow the use of MMX
38560    registers unless the user explicitly asks for it.  So we do *not* define
38561    vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
38562    we have builtins invoked by mmintrin.h that gives us license to emit
38563    these sorts of instructions.  */
38564 
38565 static rtx
38566 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38567 {
38568   machine_mode tmode = TYPE_MODE (type);
38569   machine_mode inner_mode = GET_MODE_INNER (tmode);
38570   int i, n_elt = GET_MODE_NUNITS (tmode);
38571   rtvec v = rtvec_alloc (n_elt);
38572 
38573   gcc_assert (VECTOR_MODE_P (tmode));
38574   gcc_assert (call_expr_nargs (exp) == n_elt);
38575 
38576   for (i = 0; i < n_elt; ++i)
38577     {
38578       rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38579       RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38580     }
38581 
38582   if (!target || !register_operand (target, tmode))
38583     target = gen_reg_rtx (tmode);
38584 
38585   ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38586   return target;
38587 }
38588 
38589 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
38590    ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
38591    had a language-level syntax for referencing vector elements.  */
38592 
38593 static rtx
38594 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38595 {
38596   machine_mode tmode, mode0;
38597   tree arg0, arg1;
38598   int elt;
38599   rtx op0;
38600 
38601   arg0 = CALL_EXPR_ARG (exp, 0);
38602   arg1 = CALL_EXPR_ARG (exp, 1);
38603 
38604   op0 = expand_normal (arg0);
38605   elt = get_element_number (TREE_TYPE (arg0), arg1);
38606 
38607   tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38608   mode0 = TYPE_MODE (TREE_TYPE (arg0));
38609   gcc_assert (VECTOR_MODE_P (mode0));
38610 
38611   op0 = force_reg (mode0, op0);
38612 
38613   if (optimize || !target || !register_operand (target, tmode))
38614     target = gen_reg_rtx (tmode);
38615 
38616   ix86_expand_vector_extract (true, target, op0, elt);
38617 
38618   return target;
38619 }
38620 
38621 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
38622    ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
38623    a language-level syntax for referencing vector elements.  */
38624 
38625 static rtx
38626 ix86_expand_vec_set_builtin (tree exp)
38627 {
38628   machine_mode tmode, mode1;
38629   tree arg0, arg1, arg2;
38630   int elt;
38631   rtx op0, op1, target;
38632 
38633   arg0 = CALL_EXPR_ARG (exp, 0);
38634   arg1 = CALL_EXPR_ARG (exp, 1);
38635   arg2 = CALL_EXPR_ARG (exp, 2);
38636 
38637   tmode = TYPE_MODE (TREE_TYPE (arg0));
38638   mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38639   gcc_assert (VECTOR_MODE_P (tmode));
38640 
38641   op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38642   op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38643   elt = get_element_number (TREE_TYPE (arg0), arg2);
38644 
38645   if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38646     op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38647 
38648   op0 = force_reg (tmode, op0);
38649   op1 = force_reg (mode1, op1);
38650 
38651   /* OP0 is the source of these builtin functions and shouldn't be
38652      modified.  Create a copy, use it and return it as target.  */
38653   target = gen_reg_rtx (tmode);
38654   emit_move_insn (target, op0);
38655   ix86_expand_vector_set (true, target, op1, elt);
38656 
38657   return target;
38658 }
38659 
38660 /* Emit conditional move of SRC to DST with condition
38661    OP1 CODE OP2.  */
38662 static void
38663 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38664 {
38665   rtx t;
38666 
38667   if (TARGET_CMOVE)
38668     {
38669       t = ix86_expand_compare (code, op1, op2);
38670       emit_insn (gen_rtx_SET (VOIDmode, dst,
38671 			      gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38672 						    src, dst)));
38673     }
38674   else
38675     {
38676       rtx nomove = gen_label_rtx ();
38677       emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38678 			       const0_rtx, GET_MODE (op1), 1, nomove);
38679       emit_move_insn (dst, src);
38680       emit_label (nomove);
38681     }
38682 }
38683 
38684 /* Choose max of DST and SRC and put it to DST.  */
38685 static void
38686 ix86_emit_move_max (rtx dst, rtx src)
38687 {
38688   ix86_emit_cmove (dst, src, LTU, dst, src);
38689 }
38690 
38691 /* Expand an expression EXP that calls a built-in function,
38692    with result going to TARGET if that's convenient
38693    (and in mode MODE if that's convenient).
38694    SUBTARGET may be used as the target for computing one of EXP's operands.
38695    IGNORE is nonzero if the value is to be ignored.  */
38696 
38697 static rtx
38698 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38699 		     machine_mode mode, int ignore)
38700 {
38701   const struct builtin_description *d;
38702   size_t i;
38703   enum insn_code icode;
38704   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38705   tree arg0, arg1, arg2, arg3, arg4;
38706   rtx op0, op1, op2, op3, op4, pat, insn;
38707   machine_mode mode0, mode1, mode2, mode3, mode4;
38708   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38709 
38710   /* For CPU builtins that can be folded, fold first and expand the fold.  */
38711   switch (fcode)
38712     {
38713     case IX86_BUILTIN_CPU_INIT:
38714       {
38715 	/* Make it call __cpu_indicator_init in libgcc. */
38716 	tree call_expr, fndecl, type;
38717         type = build_function_type_list (integer_type_node, NULL_TREE);
38718 	fndecl = build_fn_decl ("__cpu_indicator_init", type);
38719 	call_expr = build_call_expr (fndecl, 0);
38720 	return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38721       }
38722     case IX86_BUILTIN_CPU_IS:
38723     case IX86_BUILTIN_CPU_SUPPORTS:
38724       {
38725 	tree arg0 = CALL_EXPR_ARG (exp, 0);
38726 	tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38727 	gcc_assert (fold_expr != NULL_TREE);
38728 	return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38729       }
38730     }
38731 
38732   /* Determine whether the builtin function is available under the current ISA.
38733      Originally the builtin was not created if it wasn't applicable to the
38734      current ISA based on the command line switches.  With function specific
38735      options, we need to check in the context of the function making the call
38736      whether it is supported.  */
38737   if (ix86_builtins_isa[fcode].isa
38738       && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38739     {
38740       char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38741 				       NULL, (enum fpmath_unit) 0, false);
38742 
38743       if (!opts)
38744 	error ("%qE needs unknown isa option", fndecl);
38745       else
38746 	{
38747 	  gcc_assert (opts != NULL);
38748 	  error ("%qE needs isa option %s", fndecl, opts);
38749 	  free (opts);
38750 	}
38751       return const0_rtx;
38752     }
38753 
38754   switch (fcode)
38755     {
38756     case IX86_BUILTIN_BNDMK:
38757       if (!target
38758 	  || GET_MODE (target) != BNDmode
38759 	  || !register_operand (target, BNDmode))
38760 	target = gen_reg_rtx (BNDmode);
38761 
38762       arg0 = CALL_EXPR_ARG (exp, 0);
38763       arg1 = CALL_EXPR_ARG (exp, 1);
38764 
38765       op0 = expand_normal (arg0);
38766       op1 = expand_normal (arg1);
38767 
38768       if (!register_operand (op0, Pmode))
38769 	op0 = ix86_zero_extend_to_Pmode (op0);
38770       if (!register_operand (op1, Pmode))
38771 	op1 = ix86_zero_extend_to_Pmode (op1);
38772 
38773       /* Builtin arg1 is size of block but instruction op1 should
38774 	 be (size - 1).  */
38775       op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38776 				 NULL_RTX, 1, OPTAB_DIRECT);
38777 
38778       emit_insn (BNDmode == BND64mode
38779                  ? gen_bnd64_mk (target, op0, op1)
38780                  : gen_bnd32_mk (target, op0, op1));
38781       return target;
38782 
38783     case IX86_BUILTIN_BNDSTX:
38784       arg0 = CALL_EXPR_ARG (exp, 0);
38785       arg1 = CALL_EXPR_ARG (exp, 1);
38786       arg2 = CALL_EXPR_ARG (exp, 2);
38787 
38788       op0 = expand_normal (arg0);
38789       op1 = expand_normal (arg1);
38790       op2 = expand_normal (arg2);
38791 
38792       if (!register_operand (op0, Pmode))
38793 	op0 = ix86_zero_extend_to_Pmode (op0);
38794       if (!register_operand (op1, BNDmode))
38795 	op1 = copy_to_mode_reg (BNDmode, op1);
38796       if (!register_operand (op2, Pmode))
38797 	op2 = ix86_zero_extend_to_Pmode (op2);
38798 
38799       emit_insn (BNDmode == BND64mode
38800                  ? gen_bnd64_stx (op2, op0, op1)
38801                  : gen_bnd32_stx (op2, op0, op1));
38802       return 0;
38803 
38804     case IX86_BUILTIN_BNDLDX:
38805       if (!target
38806 	  || GET_MODE (target) != BNDmode
38807 	  || !register_operand (target, BNDmode))
38808 	target = gen_reg_rtx (BNDmode);
38809 
38810       arg0 = CALL_EXPR_ARG (exp, 0);
38811       arg1 = CALL_EXPR_ARG (exp, 1);
38812 
38813       op0 = expand_normal (arg0);
38814       op1 = expand_normal (arg1);
38815 
38816       if (!register_operand (op0, Pmode))
38817 	op0 = ix86_zero_extend_to_Pmode (op0);
38818       if (!register_operand (op1, Pmode))
38819 	op1 = ix86_zero_extend_to_Pmode (op1);
38820 
38821       emit_insn (BNDmode == BND64mode
38822 		 ? gen_bnd64_ldx (target, op0, op1)
38823 		 : gen_bnd32_ldx (target, op0, op1));
38824       return target;
38825 
38826     case IX86_BUILTIN_BNDCL:
38827       arg0 = CALL_EXPR_ARG (exp, 0);
38828       arg1 = CALL_EXPR_ARG (exp, 1);
38829 
38830       op0 = expand_normal (arg0);
38831       op1 = expand_normal (arg1);
38832 
38833       if (!register_operand (op0, Pmode))
38834 	op0 = ix86_zero_extend_to_Pmode (op0);
38835       if (!register_operand (op1, BNDmode))
38836 	op1 = copy_to_mode_reg (BNDmode, op1);
38837 
38838       emit_insn (BNDmode == BND64mode
38839                  ? gen_bnd64_cl (op1, op0)
38840                  : gen_bnd32_cl (op1, op0));
38841       return 0;
38842 
38843     case IX86_BUILTIN_BNDCU:
38844       arg0 = CALL_EXPR_ARG (exp, 0);
38845       arg1 = CALL_EXPR_ARG (exp, 1);
38846 
38847       op0 = expand_normal (arg0);
38848       op1 = expand_normal (arg1);
38849 
38850       if (!register_operand (op0, Pmode))
38851 	op0 = ix86_zero_extend_to_Pmode (op0);
38852       if (!register_operand (op1, BNDmode))
38853 	op1 = copy_to_mode_reg (BNDmode, op1);
38854 
38855       emit_insn (BNDmode == BND64mode
38856                  ? gen_bnd64_cu (op1, op0)
38857                  : gen_bnd32_cu (op1, op0));
38858       return 0;
38859 
38860     case IX86_BUILTIN_BNDRET:
38861       arg0 = CALL_EXPR_ARG (exp, 0);
38862       gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38863       target = chkp_get_rtl_bounds (arg0);
38864 
38865       /* If no bounds were specified for returned value,
38866 	 then use INIT bounds.  It usually happens when
38867 	 some built-in function is expanded.  */
38868       if (!target)
38869 	{
38870 	  rtx t1 = gen_reg_rtx (Pmode);
38871 	  rtx t2 = gen_reg_rtx (Pmode);
38872 	  target = gen_reg_rtx (BNDmode);
38873 	  emit_move_insn (t1, const0_rtx);
38874 	  emit_move_insn (t2, constm1_rtx);
38875 	  emit_insn (BNDmode == BND64mode
38876 		     ? gen_bnd64_mk (target, t1, t2)
38877 		     : gen_bnd32_mk (target, t1, t2));
38878 	}
38879 
38880       gcc_assert (target && REG_P (target));
38881       return target;
38882 
38883     case IX86_BUILTIN_BNDNARROW:
38884       {
38885 	rtx m1, m1h1, m1h2, lb, ub, t1;
38886 
38887 	/* Return value and lb.  */
38888 	arg0 = CALL_EXPR_ARG (exp, 0);
38889 	/* Bounds.  */
38890 	arg1 = CALL_EXPR_ARG (exp, 1);
38891 	/* Size.  */
38892 	arg2 = CALL_EXPR_ARG (exp, 2);
38893 
38894 	lb = expand_normal (arg0);
38895 	op1 = expand_normal (arg1);
38896 	op2 = expand_normal (arg2);
38897 
38898 	/* Size was passed but we need to use (size - 1) as for bndmk.  */
38899 	op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38900 				   NULL_RTX, 1, OPTAB_DIRECT);
38901 
38902 	/* Add LB to size and inverse to get UB.  */
38903 	op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38904 				   op2, 1, OPTAB_DIRECT);
38905 	ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38906 
38907 	if (!register_operand (lb, Pmode))
38908 	  lb = ix86_zero_extend_to_Pmode (lb);
38909 	if (!register_operand (ub, Pmode))
38910 	  ub = ix86_zero_extend_to_Pmode (ub);
38911 
38912 	/* We need to move bounds to memory before any computations.  */
38913 	if (MEM_P (op1))
38914 	  m1 = op1;
38915 	else
38916 	  {
38917 	    m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38918 	    emit_move_insn (m1, op1);
38919 	  }
38920 
38921 	/* Generate mem expression to be used for access to LB and UB.  */
38922 	m1h1 = adjust_address (m1, Pmode, 0);
38923 	m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38924 
38925 	t1 = gen_reg_rtx (Pmode);
38926 
38927 	/* Compute LB.  */
38928 	emit_move_insn (t1, m1h1);
38929 	ix86_emit_move_max (t1, lb);
38930 	emit_move_insn (m1h1, t1);
38931 
38932 	/* Compute UB.  UB is stored in 1's complement form.  Therefore
38933 	   we also use max here.  */
38934 	emit_move_insn (t1, m1h2);
38935 	ix86_emit_move_max (t1, ub);
38936 	emit_move_insn (m1h2, t1);
38937 
38938 	op2 = gen_reg_rtx (BNDmode);
38939 	emit_move_insn (op2, m1);
38940 
38941 	return chkp_join_splitted_slot (lb, op2);
38942       }
38943 
38944     case IX86_BUILTIN_BNDINT:
38945       {
38946 	rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38947 
38948 	if (!target
38949 	    || GET_MODE (target) != BNDmode
38950 	    || !register_operand (target, BNDmode))
38951 	  target = gen_reg_rtx (BNDmode);
38952 
38953 	arg0 = CALL_EXPR_ARG (exp, 0);
38954 	arg1 = CALL_EXPR_ARG (exp, 1);
38955 
38956 	op0 = expand_normal (arg0);
38957 	op1 = expand_normal (arg1);
38958 
38959 	res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38960 	rh1 = adjust_address (res, Pmode, 0);
38961 	rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38962 
38963 	/* Put first bounds to temporaries.  */
38964 	lb1 = gen_reg_rtx (Pmode);
38965 	ub1 = gen_reg_rtx (Pmode);
38966 	if (MEM_P (op0))
38967 	  {
38968 	    emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38969 	    emit_move_insn (ub1, adjust_address (op0, Pmode,
38970 						 GET_MODE_SIZE (Pmode)));
38971 	  }
38972 	else
38973 	  {
38974 	    emit_move_insn (res, op0);
38975 	    emit_move_insn (lb1, rh1);
38976 	    emit_move_insn (ub1, rh2);
38977 	  }
38978 
38979 	/* Put second bounds to temporaries.  */
38980 	lb2 = gen_reg_rtx (Pmode);
38981 	ub2 = gen_reg_rtx (Pmode);
38982 	if (MEM_P (op1))
38983 	  {
38984 	    emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38985 	    emit_move_insn (ub2, adjust_address (op1, Pmode,
38986 						 GET_MODE_SIZE (Pmode)));
38987 	  }
38988 	else
38989 	  {
38990 	    emit_move_insn (res, op1);
38991 	    emit_move_insn (lb2, rh1);
38992 	    emit_move_insn (ub2, rh2);
38993 	  }
38994 
38995 	/* Compute LB.  */
38996 	ix86_emit_move_max (lb1, lb2);
38997 	emit_move_insn (rh1, lb1);
38998 
38999 	/* Compute UB.  UB is stored in 1's complement form.  Therefore
39000 	   we also use max here.  */
39001 	ix86_emit_move_max (ub1, ub2);
39002 	emit_move_insn (rh2, ub1);
39003 
39004 	emit_move_insn (target, res);
39005 
39006 	return target;
39007       }
39008 
39009     case IX86_BUILTIN_SIZEOF:
39010       {
39011 	tree name;
39012 	rtx symbol;
39013 
39014 	if (!target
39015 	    || GET_MODE (target) != Pmode
39016 	    || !register_operand (target, Pmode))
39017 	  target = gen_reg_rtx (Pmode);
39018 
39019 	arg0 = CALL_EXPR_ARG (exp, 0);
39020 	gcc_assert (TREE_CODE (arg0) == VAR_DECL);
39021 
39022 	name = DECL_ASSEMBLER_NAME (arg0);
39023 	symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
39024 
39025 	emit_insn (Pmode == SImode
39026 		   ? gen_move_size_reloc_si (target, symbol)
39027 		   : gen_move_size_reloc_di (target, symbol));
39028 
39029 	return target;
39030       }
39031 
39032     case IX86_BUILTIN_BNDLOWER:
39033       {
39034 	rtx mem, hmem;
39035 
39036 	if (!target
39037 	    || GET_MODE (target) != Pmode
39038 	    || !register_operand (target, Pmode))
39039 	  target = gen_reg_rtx (Pmode);
39040 
39041 	arg0 = CALL_EXPR_ARG (exp, 0);
39042 	op0 = expand_normal (arg0);
39043 
39044 	/* We need to move bounds to memory first.  */
39045 	if (MEM_P (op0))
39046 	  mem = op0;
39047 	else
39048 	  {
39049 	    mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
39050 	    emit_move_insn (mem, op0);
39051 	  }
39052 
39053 	/* Generate mem expression to access LB and load it.  */
39054 	hmem = adjust_address (mem, Pmode, 0);
39055 	emit_move_insn (target, hmem);
39056 
39057 	return target;
39058       }
39059 
39060     case IX86_BUILTIN_BNDUPPER:
39061       {
39062 	rtx mem, hmem, res;
39063 
39064 	if (!target
39065 	    || GET_MODE (target) != Pmode
39066 	    || !register_operand (target, Pmode))
39067 	  target = gen_reg_rtx (Pmode);
39068 
39069 	arg0 = CALL_EXPR_ARG (exp, 0);
39070 	op0 = expand_normal (arg0);
39071 
39072 	/* We need to move bounds to memory first.  */
39073 	if (MEM_P (op0))
39074 	  mem = op0;
39075 	else
39076 	  {
39077 	    mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
39078 	    emit_move_insn (mem, op0);
39079 	  }
39080 
39081 	/* Generate mem expression to access UB.  */
39082 	hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
39083 
39084 	/* We need to inverse all bits of UB.  */
39085 	res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
39086 
39087 	if (res != target)
39088 	  emit_move_insn (target, res);
39089 
39090 	return target;
39091       }
39092 
39093     case IX86_BUILTIN_MASKMOVQ:
39094     case IX86_BUILTIN_MASKMOVDQU:
39095       icode = (fcode == IX86_BUILTIN_MASKMOVQ
39096 	       ? CODE_FOR_mmx_maskmovq
39097 	       : CODE_FOR_sse2_maskmovdqu);
39098       /* Note the arg order is different from the operand order.  */
39099       arg1 = CALL_EXPR_ARG (exp, 0);
39100       arg2 = CALL_EXPR_ARG (exp, 1);
39101       arg0 = CALL_EXPR_ARG (exp, 2);
39102       op0 = expand_normal (arg0);
39103       op1 = expand_normal (arg1);
39104       op2 = expand_normal (arg2);
39105       mode0 = insn_data[icode].operand[0].mode;
39106       mode1 = insn_data[icode].operand[1].mode;
39107       mode2 = insn_data[icode].operand[2].mode;
39108 
39109       op0 = ix86_zero_extend_to_Pmode (op0);
39110       op0 = gen_rtx_MEM (mode1, op0);
39111 
39112       if (!insn_data[icode].operand[0].predicate (op0, mode0))
39113 	op0 = copy_to_mode_reg (mode0, op0);
39114       if (!insn_data[icode].operand[1].predicate (op1, mode1))
39115 	op1 = copy_to_mode_reg (mode1, op1);
39116       if (!insn_data[icode].operand[2].predicate (op2, mode2))
39117 	op2 = copy_to_mode_reg (mode2, op2);
39118       pat = GEN_FCN (icode) (op0, op1, op2);
39119       if (! pat)
39120 	return 0;
39121       emit_insn (pat);
39122       return 0;
39123 
39124     case IX86_BUILTIN_LDMXCSR:
39125       op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
39126       target = assign_386_stack_local (SImode, SLOT_TEMP);
39127       emit_move_insn (target, op0);
39128       emit_insn (gen_sse_ldmxcsr (target));
39129       return 0;
39130 
39131     case IX86_BUILTIN_STMXCSR:
39132       target = assign_386_stack_local (SImode, SLOT_TEMP);
39133       emit_insn (gen_sse_stmxcsr (target));
39134       return copy_to_mode_reg (SImode, target);
39135 
39136     case IX86_BUILTIN_CLFLUSH:
39137 	arg0 = CALL_EXPR_ARG (exp, 0);
39138 	op0 = expand_normal (arg0);
39139 	icode = CODE_FOR_sse2_clflush;
39140 	if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39141 	  op0 = ix86_zero_extend_to_Pmode (op0);
39142 
39143 	emit_insn (gen_sse2_clflush (op0));
39144 	return 0;
39145 
39146     case IX86_BUILTIN_CLWB:
39147 	arg0 = CALL_EXPR_ARG (exp, 0);
39148 	op0 = expand_normal (arg0);
39149 	icode = CODE_FOR_clwb;
39150 	if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39151 	  op0 = ix86_zero_extend_to_Pmode (op0);
39152 
39153 	emit_insn (gen_clwb (op0));
39154 	return 0;
39155 
39156     case IX86_BUILTIN_CLFLUSHOPT:
39157 	arg0 = CALL_EXPR_ARG (exp, 0);
39158 	op0 = expand_normal (arg0);
39159 	icode = CODE_FOR_clflushopt;
39160 	if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39161 	  op0 = ix86_zero_extend_to_Pmode (op0);
39162 
39163 	emit_insn (gen_clflushopt (op0));
39164 	return 0;
39165 
39166     case IX86_BUILTIN_MONITOR:
39167     case IX86_BUILTIN_MONITORX:
39168       arg0 = CALL_EXPR_ARG (exp, 0);
39169       arg1 = CALL_EXPR_ARG (exp, 1);
39170       arg2 = CALL_EXPR_ARG (exp, 2);
39171       op0 = expand_normal (arg0);
39172       op1 = expand_normal (arg1);
39173       op2 = expand_normal (arg2);
39174       if (!REG_P (op0))
39175 	op0 = ix86_zero_extend_to_Pmode (op0);
39176       if (!REG_P (op1))
39177 	op1 = copy_to_mode_reg (SImode, op1);
39178       if (!REG_P (op2))
39179 	op2 = copy_to_mode_reg (SImode, op2);
39180 
39181       emit_insn (fcode == IX86_BUILTIN_MONITOR
39182 		 ? ix86_gen_monitor (op0, op1, op2)
39183 		 : ix86_gen_monitorx (op0, op1, op2));
39184       return 0;
39185 
39186     case IX86_BUILTIN_MWAIT:
39187       arg0 = CALL_EXPR_ARG (exp, 0);
39188       arg1 = CALL_EXPR_ARG (exp, 1);
39189       op0 = expand_normal (arg0);
39190       op1 = expand_normal (arg1);
39191       if (!REG_P (op0))
39192 	op0 = copy_to_mode_reg (SImode, op0);
39193       if (!REG_P (op1))
39194 	op1 = copy_to_mode_reg (SImode, op1);
39195       emit_insn (gen_sse3_mwait (op0, op1));
39196       return 0;
39197 
39198     case IX86_BUILTIN_MWAITX:
39199       arg0 = CALL_EXPR_ARG (exp, 0);
39200       arg1 = CALL_EXPR_ARG (exp, 1);
39201       arg2 = CALL_EXPR_ARG (exp, 2);
39202       op0 = expand_normal (arg0);
39203       op1 = expand_normal (arg1);
39204       op2 = expand_normal (arg2);
39205       if (!REG_P (op0))
39206 	op0 = copy_to_mode_reg (SImode, op0);
39207       if (!REG_P (op1))
39208 	op1 = copy_to_mode_reg (SImode, op1);
39209       if (!REG_P (op2))
39210 	op2 = copy_to_mode_reg (SImode, op2);
39211       emit_insn (gen_mwaitx (op0, op1, op2));
39212       return 0;
39213 
39214     case IX86_BUILTIN_VEC_INIT_V2SI:
39215     case IX86_BUILTIN_VEC_INIT_V4HI:
39216     case IX86_BUILTIN_VEC_INIT_V8QI:
39217       return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
39218 
39219     case IX86_BUILTIN_VEC_EXT_V2DF:
39220     case IX86_BUILTIN_VEC_EXT_V2DI:
39221     case IX86_BUILTIN_VEC_EXT_V4SF:
39222     case IX86_BUILTIN_VEC_EXT_V4SI:
39223     case IX86_BUILTIN_VEC_EXT_V8HI:
39224     case IX86_BUILTIN_VEC_EXT_V2SI:
39225     case IX86_BUILTIN_VEC_EXT_V4HI:
39226     case IX86_BUILTIN_VEC_EXT_V16QI:
39227       return ix86_expand_vec_ext_builtin (exp, target);
39228 
39229     case IX86_BUILTIN_VEC_SET_V2DI:
39230     case IX86_BUILTIN_VEC_SET_V4SF:
39231     case IX86_BUILTIN_VEC_SET_V4SI:
39232     case IX86_BUILTIN_VEC_SET_V8HI:
39233     case IX86_BUILTIN_VEC_SET_V4HI:
39234     case IX86_BUILTIN_VEC_SET_V16QI:
39235       return ix86_expand_vec_set_builtin (exp);
39236 
39237     case IX86_BUILTIN_INFQ:
39238     case IX86_BUILTIN_HUGE_VALQ:
39239       {
39240 	REAL_VALUE_TYPE inf;
39241 	rtx tmp;
39242 
39243 	real_inf (&inf);
39244 	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
39245 
39246 	tmp = validize_mem (force_const_mem (mode, tmp));
39247 
39248 	if (target == 0)
39249 	  target = gen_reg_rtx (mode);
39250 
39251 	emit_move_insn (target, tmp);
39252 	return target;
39253       }
39254 
39255     case IX86_BUILTIN_RDPMC:
39256     case IX86_BUILTIN_RDTSC:
39257     case IX86_BUILTIN_RDTSCP:
39258 
39259       op0 = gen_reg_rtx (DImode);
39260       op1 = gen_reg_rtx (DImode);
39261 
39262       if (fcode == IX86_BUILTIN_RDPMC)
39263 	{
39264 	  arg0 = CALL_EXPR_ARG (exp, 0);
39265 	  op2 = expand_normal (arg0);
39266 	  if (!register_operand (op2, SImode))
39267 	    op2 = copy_to_mode_reg (SImode, op2);
39268 
39269 	  insn = (TARGET_64BIT
39270 		  ? gen_rdpmc_rex64 (op0, op1, op2)
39271 		  : gen_rdpmc (op0, op2));
39272 	  emit_insn (insn);
39273 	}
39274       else if (fcode == IX86_BUILTIN_RDTSC)
39275 	{
39276 	  insn = (TARGET_64BIT
39277 		  ? gen_rdtsc_rex64 (op0, op1)
39278 		  : gen_rdtsc (op0));
39279 	  emit_insn (insn);
39280 	}
39281       else
39282 	{
39283 	  op2 = gen_reg_rtx (SImode);
39284 
39285 	  insn = (TARGET_64BIT
39286 		  ? gen_rdtscp_rex64 (op0, op1, op2)
39287 		  : gen_rdtscp (op0, op2));
39288 	  emit_insn (insn);
39289 
39290 	  arg0 = CALL_EXPR_ARG (exp, 0);
39291 	  op4 = expand_normal (arg0);
39292 	  if (!address_operand (op4, VOIDmode))
39293 	    {
39294 	      op4 = convert_memory_address (Pmode, op4);
39295 	      op4 = copy_addr_to_reg (op4);
39296 	    }
39297 	  emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39298 	}
39299 
39300       if (target == 0)
39301 	{
39302 	  /* mode is VOIDmode if __builtin_rd* has been called
39303 	     without lhs.  */
39304 	  if (mode == VOIDmode)
39305 	    return target;
39306 	  target = gen_reg_rtx (mode);
39307 	}
39308 
39309       if (TARGET_64BIT)
39310 	{
39311 	  op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39312 				     op1, 1, OPTAB_DIRECT);
39313 	  op0 = expand_simple_binop (DImode, IOR, op0, op1,
39314 				     op0, 1, OPTAB_DIRECT);
39315 	}
39316 
39317       emit_move_insn (target, op0);
39318       return target;
39319 
39320     case IX86_BUILTIN_FXSAVE:
39321     case IX86_BUILTIN_FXRSTOR:
39322     case IX86_BUILTIN_FXSAVE64:
39323     case IX86_BUILTIN_FXRSTOR64:
39324     case IX86_BUILTIN_FNSTENV:
39325     case IX86_BUILTIN_FLDENV:
39326       mode0 = BLKmode;
39327       switch (fcode)
39328 	{
39329 	case IX86_BUILTIN_FXSAVE:
39330 	  icode = CODE_FOR_fxsave;
39331 	  break;
39332 	case IX86_BUILTIN_FXRSTOR:
39333 	  icode = CODE_FOR_fxrstor;
39334 	  break;
39335 	case IX86_BUILTIN_FXSAVE64:
39336 	  icode = CODE_FOR_fxsave64;
39337 	  break;
39338 	case IX86_BUILTIN_FXRSTOR64:
39339 	  icode = CODE_FOR_fxrstor64;
39340 	  break;
39341 	case IX86_BUILTIN_FNSTENV:
39342 	  icode = CODE_FOR_fnstenv;
39343 	  break;
39344 	case IX86_BUILTIN_FLDENV:
39345 	  icode = CODE_FOR_fldenv;
39346 	  break;
39347 	default:
39348 	  gcc_unreachable ();
39349 	}
39350 
39351       arg0 = CALL_EXPR_ARG (exp, 0);
39352       op0 = expand_normal (arg0);
39353 
39354       if (!address_operand (op0, VOIDmode))
39355 	{
39356 	  op0 = convert_memory_address (Pmode, op0);
39357 	  op0 = copy_addr_to_reg (op0);
39358 	}
39359       op0 = gen_rtx_MEM (mode0, op0);
39360 
39361       pat = GEN_FCN (icode) (op0);
39362       if (pat)
39363 	emit_insn (pat);
39364       return 0;
39365 
39366     case IX86_BUILTIN_XSAVE:
39367     case IX86_BUILTIN_XRSTOR:
39368     case IX86_BUILTIN_XSAVE64:
39369     case IX86_BUILTIN_XRSTOR64:
39370     case IX86_BUILTIN_XSAVEOPT:
39371     case IX86_BUILTIN_XSAVEOPT64:
39372     case IX86_BUILTIN_XSAVES:
39373     case IX86_BUILTIN_XRSTORS:
39374     case IX86_BUILTIN_XSAVES64:
39375     case IX86_BUILTIN_XRSTORS64:
39376     case IX86_BUILTIN_XSAVEC:
39377     case IX86_BUILTIN_XSAVEC64:
39378       arg0 = CALL_EXPR_ARG (exp, 0);
39379       arg1 = CALL_EXPR_ARG (exp, 1);
39380       op0 = expand_normal (arg0);
39381       op1 = expand_normal (arg1);
39382 
39383       if (!address_operand (op0, VOIDmode))
39384 	{
39385 	  op0 = convert_memory_address (Pmode, op0);
39386 	  op0 = copy_addr_to_reg (op0);
39387 	}
39388       op0 = gen_rtx_MEM (BLKmode, op0);
39389 
39390       op1 = force_reg (DImode, op1);
39391 
39392       if (TARGET_64BIT)
39393 	{
39394 	  op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39395 				     NULL, 1, OPTAB_DIRECT);
39396 	  switch (fcode)
39397 	    {
39398 	    case IX86_BUILTIN_XSAVE:
39399 	      icode = CODE_FOR_xsave_rex64;
39400 	      break;
39401 	    case IX86_BUILTIN_XRSTOR:
39402 	      icode = CODE_FOR_xrstor_rex64;
39403 	      break;
39404 	    case IX86_BUILTIN_XSAVE64:
39405 	      icode = CODE_FOR_xsave64;
39406 	      break;
39407 	    case IX86_BUILTIN_XRSTOR64:
39408 	      icode = CODE_FOR_xrstor64;
39409 	      break;
39410 	    case IX86_BUILTIN_XSAVEOPT:
39411 	      icode = CODE_FOR_xsaveopt_rex64;
39412 	      break;
39413 	    case IX86_BUILTIN_XSAVEOPT64:
39414 	      icode = CODE_FOR_xsaveopt64;
39415 	      break;
39416 	    case IX86_BUILTIN_XSAVES:
39417 	      icode = CODE_FOR_xsaves_rex64;
39418 	      break;
39419 	    case IX86_BUILTIN_XRSTORS:
39420 	      icode = CODE_FOR_xrstors_rex64;
39421 	      break;
39422 	    case IX86_BUILTIN_XSAVES64:
39423 	      icode = CODE_FOR_xsaves64;
39424 	      break;
39425 	    case IX86_BUILTIN_XRSTORS64:
39426 	      icode = CODE_FOR_xrstors64;
39427 	      break;
39428 	    case IX86_BUILTIN_XSAVEC:
39429 	      icode = CODE_FOR_xsavec_rex64;
39430 	      break;
39431 	    case IX86_BUILTIN_XSAVEC64:
39432 	      icode = CODE_FOR_xsavec64;
39433 	      break;
39434 	    default:
39435 	      gcc_unreachable ();
39436 	    }
39437 
39438 	  op2 = gen_lowpart (SImode, op2);
39439 	  op1 = gen_lowpart (SImode, op1);
39440 	  pat = GEN_FCN (icode) (op0, op1, op2);
39441 	}
39442       else
39443 	{
39444 	  switch (fcode)
39445 	    {
39446 	    case IX86_BUILTIN_XSAVE:
39447 	      icode = CODE_FOR_xsave;
39448 	      break;
39449 	    case IX86_BUILTIN_XRSTOR:
39450 	      icode = CODE_FOR_xrstor;
39451 	      break;
39452 	    case IX86_BUILTIN_XSAVEOPT:
39453 	      icode = CODE_FOR_xsaveopt;
39454 	      break;
39455 	    case IX86_BUILTIN_XSAVES:
39456 	      icode = CODE_FOR_xsaves;
39457 	      break;
39458 	    case IX86_BUILTIN_XRSTORS:
39459 	      icode = CODE_FOR_xrstors;
39460 	      break;
39461 	    case IX86_BUILTIN_XSAVEC:
39462 	      icode = CODE_FOR_xsavec;
39463 	      break;
39464 	    default:
39465 	      gcc_unreachable ();
39466 	    }
39467 	  pat = GEN_FCN (icode) (op0, op1);
39468 	}
39469 
39470       if (pat)
39471 	emit_insn (pat);
39472       return 0;
39473 
39474     case IX86_BUILTIN_LLWPCB:
39475       arg0 = CALL_EXPR_ARG (exp, 0);
39476       op0 = expand_normal (arg0);
39477       icode = CODE_FOR_lwp_llwpcb;
39478       if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39479 	op0 = ix86_zero_extend_to_Pmode (op0);
39480       emit_insn (gen_lwp_llwpcb (op0));
39481       return 0;
39482 
39483     case IX86_BUILTIN_SLWPCB:
39484       icode = CODE_FOR_lwp_slwpcb;
39485       if (!target
39486 	  || !insn_data[icode].operand[0].predicate (target, Pmode))
39487 	target = gen_reg_rtx (Pmode);
39488       emit_insn (gen_lwp_slwpcb (target));
39489       return target;
39490 
39491     case IX86_BUILTIN_BEXTRI32:
39492     case IX86_BUILTIN_BEXTRI64:
39493       arg0 = CALL_EXPR_ARG (exp, 0);
39494       arg1 = CALL_EXPR_ARG (exp, 1);
39495       op0 = expand_normal (arg0);
39496       op1 = expand_normal (arg1);
39497       icode = (fcode == IX86_BUILTIN_BEXTRI32
39498 	  ? CODE_FOR_tbm_bextri_si
39499 	  : CODE_FOR_tbm_bextri_di);
39500       if (!CONST_INT_P (op1))
39501         {
39502           error ("last argument must be an immediate");
39503           return const0_rtx;
39504         }
39505       else
39506         {
39507           unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39508           unsigned char lsb_index = INTVAL (op1) & 0xFF;
39509           op1 = GEN_INT (length);
39510           op2 = GEN_INT (lsb_index);
39511           pat = GEN_FCN (icode) (target, op0, op1, op2);
39512           if (pat)
39513             emit_insn (pat);
39514           return target;
39515         }
39516 
39517     case IX86_BUILTIN_RDRAND16_STEP:
39518       icode = CODE_FOR_rdrandhi_1;
39519       mode0 = HImode;
39520       goto rdrand_step;
39521 
39522     case IX86_BUILTIN_RDRAND32_STEP:
39523       icode = CODE_FOR_rdrandsi_1;
39524       mode0 = SImode;
39525       goto rdrand_step;
39526 
39527     case IX86_BUILTIN_RDRAND64_STEP:
39528       icode = CODE_FOR_rdranddi_1;
39529       mode0 = DImode;
39530 
39531 rdrand_step:
39532       arg0 = CALL_EXPR_ARG (exp, 0);
39533       op1 = expand_normal (arg0);
39534       if (!address_operand (op1, VOIDmode))
39535 	{
39536 	  op1 = convert_memory_address (Pmode, op1);
39537 	  op1 = copy_addr_to_reg (op1);
39538 	}
39539 
39540       op0 = gen_reg_rtx (mode0);
39541       emit_insn (GEN_FCN (icode) (op0));
39542 
39543       emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39544 
39545       op1 = gen_reg_rtx (SImode);
39546       emit_move_insn (op1, CONST1_RTX (SImode));
39547 
39548       /* Emit SImode conditional move.  */
39549       if (mode0 == HImode)
39550 	{
39551 	  if (TARGET_ZERO_EXTEND_WITH_AND
39552 	      && optimize_function_for_speed_p (cfun))
39553 	    {
39554 	      op2 = force_reg (SImode, const0_rtx);
39555 
39556 	      emit_insn (gen_movstricthi
39557 			 (gen_lowpart (HImode, op2), op0));
39558 	    }
39559 	  else
39560 	    {
39561 	      op2 = gen_reg_rtx (SImode);
39562 
39563 	      emit_insn (gen_zero_extendhisi2 (op2, op0));
39564 	    }
39565 	}
39566       else if (mode0 == SImode)
39567 	op2 = op0;
39568       else
39569 	op2 = gen_rtx_SUBREG (SImode, op0, 0);
39570 
39571       if (target == 0
39572 	  || !register_operand (target, SImode))
39573 	target = gen_reg_rtx (SImode);
39574 
39575       pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39576 			 const0_rtx);
39577       emit_insn (gen_rtx_SET (VOIDmode, target,
39578 			      gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39579       return target;
39580 
39581     case IX86_BUILTIN_RDSEED16_STEP:
39582       icode = CODE_FOR_rdseedhi_1;
39583       mode0 = HImode;
39584       goto rdseed_step;
39585 
39586     case IX86_BUILTIN_RDSEED32_STEP:
39587       icode = CODE_FOR_rdseedsi_1;
39588       mode0 = SImode;
39589       goto rdseed_step;
39590 
39591     case IX86_BUILTIN_RDSEED64_STEP:
39592       icode = CODE_FOR_rdseeddi_1;
39593       mode0 = DImode;
39594 
39595 rdseed_step:
39596       arg0 = CALL_EXPR_ARG (exp, 0);
39597       op1 = expand_normal (arg0);
39598       if (!address_operand (op1, VOIDmode))
39599 	{
39600 	  op1 = convert_memory_address (Pmode, op1);
39601 	  op1 = copy_addr_to_reg (op1);
39602 	}
39603 
39604       op0 = gen_reg_rtx (mode0);
39605       emit_insn (GEN_FCN (icode) (op0));
39606 
39607       emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39608 
39609       op2 = gen_reg_rtx (QImode);
39610 
39611       pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39612                          const0_rtx);
39613       emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39614 
39615       if (target == 0
39616 	  || !register_operand (target, SImode))
39617         target = gen_reg_rtx (SImode);
39618 
39619       emit_insn (gen_zero_extendqisi2 (target, op2));
39620       return target;
39621 
39622     case IX86_BUILTIN_SBB32:
39623       icode = CODE_FOR_subborrowsi;
39624       mode0 = SImode;
39625       goto handlecarry;
39626 
39627     case IX86_BUILTIN_SBB64:
39628       icode = CODE_FOR_subborrowdi;
39629       mode0 = DImode;
39630       goto handlecarry;
39631 
39632     case IX86_BUILTIN_ADDCARRYX32:
39633       icode = CODE_FOR_addcarrysi;
39634       mode0 = SImode;
39635       goto handlecarry;
39636 
39637     case IX86_BUILTIN_ADDCARRYX64:
39638       icode = CODE_FOR_addcarrydi;
39639       mode0 = DImode;
39640 
39641     handlecarry:
39642       arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in.  */
39643       arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1.  */
39644       arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2.  */
39645       arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out.  */
39646 
39647       op1 = expand_normal (arg0);
39648       op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39649 
39650       op2 = expand_normal (arg1);
39651       if (!register_operand (op2, mode0))
39652 	op2 = copy_to_mode_reg (mode0, op2);
39653 
39654       op3 = expand_normal (arg2);
39655       if (!register_operand (op3, mode0))
39656 	op3 = copy_to_mode_reg (mode0, op3);
39657 
39658       op4 = expand_normal (arg3);
39659       if (!address_operand (op4, VOIDmode))
39660 	{
39661 	  op4 = convert_memory_address (Pmode, op4);
39662 	  op4 = copy_addr_to_reg (op4);
39663 	}
39664 
39665       /* Generate CF from input operand.  */
39666       emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
39667 
39668       /* Generate instruction that consumes CF.  */
39669       op0 = gen_reg_rtx (mode0);
39670 
39671       op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
39672       pat = gen_rtx_LTU (mode0, op1, const0_rtx);
39673       emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
39674 
39675       /* Return current CF value.  */
39676       if (target == 0)
39677         target = gen_reg_rtx (QImode);
39678 
39679       PUT_MODE (pat, QImode);
39680       emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39681 
39682       /* Store the result.  */
39683       emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39684 
39685       return target;
39686 
39687     case IX86_BUILTIN_READ_FLAGS:
39688       emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39689 
39690       if (optimize
39691 	  || target == NULL_RTX
39692 	  || !nonimmediate_operand (target, word_mode)
39693 	  || GET_MODE (target) != word_mode)
39694 	target = gen_reg_rtx (word_mode);
39695 
39696       emit_insn (gen_pop (target));
39697       return target;
39698 
39699     case IX86_BUILTIN_WRITE_FLAGS:
39700 
39701       arg0 = CALL_EXPR_ARG (exp, 0);
39702       op0 = expand_normal (arg0);
39703       if (!general_no_elim_operand (op0, word_mode))
39704 	op0 = copy_to_mode_reg (word_mode, op0);
39705 
39706       emit_insn (gen_push (op0));
39707       emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39708       return 0;
39709 
39710     case IX86_BUILTIN_KORTESTC16:
39711       icode = CODE_FOR_kortestchi;
39712       mode0 = HImode;
39713       mode1 = CCCmode;
39714       goto kortest;
39715 
39716     case IX86_BUILTIN_KORTESTZ16:
39717       icode = CODE_FOR_kortestzhi;
39718       mode0 = HImode;
39719       mode1 = CCZmode;
39720 
39721     kortest:
39722       arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1.  */
39723       arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2.  */
39724       op0 = expand_normal (arg0);
39725       op1 = expand_normal (arg1);
39726 
39727       op0 = copy_to_reg (op0);
39728       op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39729       op1 = copy_to_reg (op1);
39730       op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39731 
39732       target = gen_reg_rtx (QImode);
39733       emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39734 
39735       /* Emit kortest.  */
39736       emit_insn (GEN_FCN (icode) (op0, op1));
39737       /* And use setcc to return result from flags.  */
39738       ix86_expand_setcc (target, EQ,
39739 			 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39740       return target;
39741 
39742     case IX86_BUILTIN_GATHERSIV2DF:
39743       icode = CODE_FOR_avx2_gathersiv2df;
39744       goto gather_gen;
39745     case IX86_BUILTIN_GATHERSIV4DF:
39746       icode = CODE_FOR_avx2_gathersiv4df;
39747       goto gather_gen;
39748     case IX86_BUILTIN_GATHERDIV2DF:
39749       icode = CODE_FOR_avx2_gatherdiv2df;
39750       goto gather_gen;
39751     case IX86_BUILTIN_GATHERDIV4DF:
39752       icode = CODE_FOR_avx2_gatherdiv4df;
39753       goto gather_gen;
39754     case IX86_BUILTIN_GATHERSIV4SF:
39755       icode = CODE_FOR_avx2_gathersiv4sf;
39756       goto gather_gen;
39757     case IX86_BUILTIN_GATHERSIV8SF:
39758       icode = CODE_FOR_avx2_gathersiv8sf;
39759       goto gather_gen;
39760     case IX86_BUILTIN_GATHERDIV4SF:
39761       icode = CODE_FOR_avx2_gatherdiv4sf;
39762       goto gather_gen;
39763     case IX86_BUILTIN_GATHERDIV8SF:
39764       icode = CODE_FOR_avx2_gatherdiv8sf;
39765       goto gather_gen;
39766     case IX86_BUILTIN_GATHERSIV2DI:
39767       icode = CODE_FOR_avx2_gathersiv2di;
39768       goto gather_gen;
39769     case IX86_BUILTIN_GATHERSIV4DI:
39770       icode = CODE_FOR_avx2_gathersiv4di;
39771       goto gather_gen;
39772     case IX86_BUILTIN_GATHERDIV2DI:
39773       icode = CODE_FOR_avx2_gatherdiv2di;
39774       goto gather_gen;
39775     case IX86_BUILTIN_GATHERDIV4DI:
39776       icode = CODE_FOR_avx2_gatherdiv4di;
39777       goto gather_gen;
39778     case IX86_BUILTIN_GATHERSIV4SI:
39779       icode = CODE_FOR_avx2_gathersiv4si;
39780       goto gather_gen;
39781     case IX86_BUILTIN_GATHERSIV8SI:
39782       icode = CODE_FOR_avx2_gathersiv8si;
39783       goto gather_gen;
39784     case IX86_BUILTIN_GATHERDIV4SI:
39785       icode = CODE_FOR_avx2_gatherdiv4si;
39786       goto gather_gen;
39787     case IX86_BUILTIN_GATHERDIV8SI:
39788       icode = CODE_FOR_avx2_gatherdiv8si;
39789       goto gather_gen;
39790     case IX86_BUILTIN_GATHERALTSIV4DF:
39791       icode = CODE_FOR_avx2_gathersiv4df;
39792       goto gather_gen;
39793     case IX86_BUILTIN_GATHERALTDIV8SF:
39794       icode = CODE_FOR_avx2_gatherdiv8sf;
39795       goto gather_gen;
39796     case IX86_BUILTIN_GATHERALTSIV4DI:
39797       icode = CODE_FOR_avx2_gathersiv4di;
39798       goto gather_gen;
39799     case IX86_BUILTIN_GATHERALTDIV8SI:
39800       icode = CODE_FOR_avx2_gatherdiv8si;
39801       goto gather_gen;
39802     case IX86_BUILTIN_GATHER3SIV16SF:
39803       icode = CODE_FOR_avx512f_gathersiv16sf;
39804       goto gather_gen;
39805     case IX86_BUILTIN_GATHER3SIV8DF:
39806       icode = CODE_FOR_avx512f_gathersiv8df;
39807       goto gather_gen;
39808     case IX86_BUILTIN_GATHER3DIV16SF:
39809       icode = CODE_FOR_avx512f_gatherdiv16sf;
39810       goto gather_gen;
39811     case IX86_BUILTIN_GATHER3DIV8DF:
39812       icode = CODE_FOR_avx512f_gatherdiv8df;
39813       goto gather_gen;
39814     case IX86_BUILTIN_GATHER3SIV16SI:
39815       icode = CODE_FOR_avx512f_gathersiv16si;
39816       goto gather_gen;
39817     case IX86_BUILTIN_GATHER3SIV8DI:
39818       icode = CODE_FOR_avx512f_gathersiv8di;
39819       goto gather_gen;
39820     case IX86_BUILTIN_GATHER3DIV16SI:
39821       icode = CODE_FOR_avx512f_gatherdiv16si;
39822       goto gather_gen;
39823     case IX86_BUILTIN_GATHER3DIV8DI:
39824       icode = CODE_FOR_avx512f_gatherdiv8di;
39825       goto gather_gen;
39826     case IX86_BUILTIN_GATHER3ALTSIV8DF:
39827       icode = CODE_FOR_avx512f_gathersiv8df;
39828       goto gather_gen;
39829     case IX86_BUILTIN_GATHER3ALTDIV16SF:
39830       icode = CODE_FOR_avx512f_gatherdiv16sf;
39831       goto gather_gen;
39832     case IX86_BUILTIN_GATHER3ALTSIV8DI:
39833       icode = CODE_FOR_avx512f_gathersiv8di;
39834       goto gather_gen;
39835     case IX86_BUILTIN_GATHER3ALTDIV16SI:
39836       icode = CODE_FOR_avx512f_gatherdiv16si;
39837       goto gather_gen;
39838     case IX86_BUILTIN_GATHER3SIV2DF:
39839       icode = CODE_FOR_avx512vl_gathersiv2df;
39840       goto gather_gen;
39841     case IX86_BUILTIN_GATHER3SIV4DF:
39842       icode = CODE_FOR_avx512vl_gathersiv4df;
39843       goto gather_gen;
39844     case IX86_BUILTIN_GATHER3DIV2DF:
39845       icode = CODE_FOR_avx512vl_gatherdiv2df;
39846       goto gather_gen;
39847     case IX86_BUILTIN_GATHER3DIV4DF:
39848       icode = CODE_FOR_avx512vl_gatherdiv4df;
39849       goto gather_gen;
39850     case IX86_BUILTIN_GATHER3SIV4SF:
39851       icode = CODE_FOR_avx512vl_gathersiv4sf;
39852       goto gather_gen;
39853     case IX86_BUILTIN_GATHER3SIV8SF:
39854       icode = CODE_FOR_avx512vl_gathersiv8sf;
39855       goto gather_gen;
39856     case IX86_BUILTIN_GATHER3DIV4SF:
39857       icode = CODE_FOR_avx512vl_gatherdiv4sf;
39858       goto gather_gen;
39859     case IX86_BUILTIN_GATHER3DIV8SF:
39860       icode = CODE_FOR_avx512vl_gatherdiv8sf;
39861       goto gather_gen;
39862     case IX86_BUILTIN_GATHER3SIV2DI:
39863       icode = CODE_FOR_avx512vl_gathersiv2di;
39864       goto gather_gen;
39865     case IX86_BUILTIN_GATHER3SIV4DI:
39866       icode = CODE_FOR_avx512vl_gathersiv4di;
39867       goto gather_gen;
39868     case IX86_BUILTIN_GATHER3DIV2DI:
39869       icode = CODE_FOR_avx512vl_gatherdiv2di;
39870       goto gather_gen;
39871     case IX86_BUILTIN_GATHER3DIV4DI:
39872       icode = CODE_FOR_avx512vl_gatherdiv4di;
39873       goto gather_gen;
39874     case IX86_BUILTIN_GATHER3SIV4SI:
39875       icode = CODE_FOR_avx512vl_gathersiv4si;
39876       goto gather_gen;
39877     case IX86_BUILTIN_GATHER3SIV8SI:
39878       icode = CODE_FOR_avx512vl_gathersiv8si;
39879       goto gather_gen;
39880     case IX86_BUILTIN_GATHER3DIV4SI:
39881       icode = CODE_FOR_avx512vl_gatherdiv4si;
39882       goto gather_gen;
39883     case IX86_BUILTIN_GATHER3DIV8SI:
39884       icode = CODE_FOR_avx512vl_gatherdiv8si;
39885       goto gather_gen;
39886     case IX86_BUILTIN_GATHER3ALTSIV4DF:
39887       icode = CODE_FOR_avx512vl_gathersiv4df;
39888       goto gather_gen;
39889     case IX86_BUILTIN_GATHER3ALTDIV8SF:
39890       icode = CODE_FOR_avx512vl_gatherdiv8sf;
39891       goto gather_gen;
39892     case IX86_BUILTIN_GATHER3ALTSIV4DI:
39893       icode = CODE_FOR_avx512vl_gathersiv4di;
39894       goto gather_gen;
39895     case IX86_BUILTIN_GATHER3ALTDIV8SI:
39896       icode = CODE_FOR_avx512vl_gatherdiv8si;
39897       goto gather_gen;
39898     case IX86_BUILTIN_SCATTERSIV16SF:
39899       icode = CODE_FOR_avx512f_scattersiv16sf;
39900       goto scatter_gen;
39901     case IX86_BUILTIN_SCATTERSIV8DF:
39902       icode = CODE_FOR_avx512f_scattersiv8df;
39903       goto scatter_gen;
39904     case IX86_BUILTIN_SCATTERDIV16SF:
39905       icode = CODE_FOR_avx512f_scatterdiv16sf;
39906       goto scatter_gen;
39907     case IX86_BUILTIN_SCATTERDIV8DF:
39908       icode = CODE_FOR_avx512f_scatterdiv8df;
39909       goto scatter_gen;
39910     case IX86_BUILTIN_SCATTERSIV16SI:
39911       icode = CODE_FOR_avx512f_scattersiv16si;
39912       goto scatter_gen;
39913     case IX86_BUILTIN_SCATTERSIV8DI:
39914       icode = CODE_FOR_avx512f_scattersiv8di;
39915       goto scatter_gen;
39916     case IX86_BUILTIN_SCATTERDIV16SI:
39917       icode = CODE_FOR_avx512f_scatterdiv16si;
39918       goto scatter_gen;
39919     case IX86_BUILTIN_SCATTERDIV8DI:
39920       icode = CODE_FOR_avx512f_scatterdiv8di;
39921       goto scatter_gen;
39922     case IX86_BUILTIN_SCATTERSIV8SF:
39923       icode = CODE_FOR_avx512vl_scattersiv8sf;
39924       goto scatter_gen;
39925     case IX86_BUILTIN_SCATTERSIV4SF:
39926       icode = CODE_FOR_avx512vl_scattersiv4sf;
39927       goto scatter_gen;
39928     case IX86_BUILTIN_SCATTERSIV4DF:
39929       icode = CODE_FOR_avx512vl_scattersiv4df;
39930       goto scatter_gen;
39931     case IX86_BUILTIN_SCATTERSIV2DF:
39932       icode = CODE_FOR_avx512vl_scattersiv2df;
39933       goto scatter_gen;
39934     case IX86_BUILTIN_SCATTERDIV8SF:
39935       icode = CODE_FOR_avx512vl_scatterdiv8sf;
39936       goto scatter_gen;
39937     case IX86_BUILTIN_SCATTERDIV4SF:
39938       icode = CODE_FOR_avx512vl_scatterdiv4sf;
39939       goto scatter_gen;
39940     case IX86_BUILTIN_SCATTERDIV4DF:
39941       icode = CODE_FOR_avx512vl_scatterdiv4df;
39942       goto scatter_gen;
39943     case IX86_BUILTIN_SCATTERDIV2DF:
39944       icode = CODE_FOR_avx512vl_scatterdiv2df;
39945       goto scatter_gen;
39946     case IX86_BUILTIN_SCATTERSIV8SI:
39947       icode = CODE_FOR_avx512vl_scattersiv8si;
39948       goto scatter_gen;
39949     case IX86_BUILTIN_SCATTERSIV4SI:
39950       icode = CODE_FOR_avx512vl_scattersiv4si;
39951       goto scatter_gen;
39952     case IX86_BUILTIN_SCATTERSIV4DI:
39953       icode = CODE_FOR_avx512vl_scattersiv4di;
39954       goto scatter_gen;
39955     case IX86_BUILTIN_SCATTERSIV2DI:
39956       icode = CODE_FOR_avx512vl_scattersiv2di;
39957       goto scatter_gen;
39958     case IX86_BUILTIN_SCATTERDIV8SI:
39959       icode = CODE_FOR_avx512vl_scatterdiv8si;
39960       goto scatter_gen;
39961     case IX86_BUILTIN_SCATTERDIV4SI:
39962       icode = CODE_FOR_avx512vl_scatterdiv4si;
39963       goto scatter_gen;
39964     case IX86_BUILTIN_SCATTERDIV4DI:
39965       icode = CODE_FOR_avx512vl_scatterdiv4di;
39966       goto scatter_gen;
39967     case IX86_BUILTIN_SCATTERDIV2DI:
39968       icode = CODE_FOR_avx512vl_scatterdiv2di;
39969       goto scatter_gen;
39970     case IX86_BUILTIN_GATHERPFDPD:
39971       icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39972       goto vec_prefetch_gen;
39973     case IX86_BUILTIN_GATHERPFDPS:
39974       icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39975       goto vec_prefetch_gen;
39976     case IX86_BUILTIN_GATHERPFQPD:
39977       icode = CODE_FOR_avx512pf_gatherpfv8didf;
39978       goto vec_prefetch_gen;
39979     case IX86_BUILTIN_GATHERPFQPS:
39980       icode = CODE_FOR_avx512pf_gatherpfv8disf;
39981       goto vec_prefetch_gen;
39982     case IX86_BUILTIN_SCATTERPFDPD:
39983       icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39984       goto vec_prefetch_gen;
39985     case IX86_BUILTIN_SCATTERPFDPS:
39986       icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39987       goto vec_prefetch_gen;
39988     case IX86_BUILTIN_SCATTERPFQPD:
39989       icode = CODE_FOR_avx512pf_scatterpfv8didf;
39990       goto vec_prefetch_gen;
39991     case IX86_BUILTIN_SCATTERPFQPS:
39992       icode = CODE_FOR_avx512pf_scatterpfv8disf;
39993       goto vec_prefetch_gen;
39994 
39995     gather_gen:
39996       rtx half;
39997       rtx (*gen) (rtx, rtx);
39998 
39999       arg0 = CALL_EXPR_ARG (exp, 0);
40000       arg1 = CALL_EXPR_ARG (exp, 1);
40001       arg2 = CALL_EXPR_ARG (exp, 2);
40002       arg3 = CALL_EXPR_ARG (exp, 3);
40003       arg4 = CALL_EXPR_ARG (exp, 4);
40004       op0 = expand_normal (arg0);
40005       op1 = expand_normal (arg1);
40006       op2 = expand_normal (arg2);
40007       op3 = expand_normal (arg3);
40008       op4 = expand_normal (arg4);
40009       /* Note the arg order is different from the operand order.  */
40010       mode0 = insn_data[icode].operand[1].mode;
40011       mode2 = insn_data[icode].operand[3].mode;
40012       mode3 = insn_data[icode].operand[4].mode;
40013       mode4 = insn_data[icode].operand[5].mode;
40014 
40015       if (target == NULL_RTX
40016 	  || GET_MODE (target) != insn_data[icode].operand[0].mode
40017 	  || !insn_data[icode].operand[0].predicate (target,
40018 						     GET_MODE (target)))
40019 	subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
40020       else
40021 	subtarget = target;
40022 
40023       switch (fcode)
40024 	{
40025 	case IX86_BUILTIN_GATHER3ALTSIV8DF:
40026 	case IX86_BUILTIN_GATHER3ALTSIV8DI:
40027 	  half = gen_reg_rtx (V8SImode);
40028 	  if (!nonimmediate_operand (op2, V16SImode))
40029 	    op2 = copy_to_mode_reg (V16SImode, op2);
40030 	  emit_insn (gen_vec_extract_lo_v16si (half, op2));
40031 	  op2 = half;
40032 	  break;
40033 	case IX86_BUILTIN_GATHER3ALTSIV4DF:
40034 	case IX86_BUILTIN_GATHER3ALTSIV4DI:
40035 	case IX86_BUILTIN_GATHERALTSIV4DF:
40036 	case IX86_BUILTIN_GATHERALTSIV4DI:
40037 	  half = gen_reg_rtx (V4SImode);
40038 	  if (!nonimmediate_operand (op2, V8SImode))
40039 	    op2 = copy_to_mode_reg (V8SImode, op2);
40040 	  emit_insn (gen_vec_extract_lo_v8si (half, op2));
40041 	  op2 = half;
40042 	  break;
40043 	case IX86_BUILTIN_GATHER3ALTDIV16SF:
40044 	case IX86_BUILTIN_GATHER3ALTDIV16SI:
40045 	  half = gen_reg_rtx (mode0);
40046 	  if (mode0 == V8SFmode)
40047 	    gen = gen_vec_extract_lo_v16sf;
40048 	  else
40049 	    gen = gen_vec_extract_lo_v16si;
40050 	  if (!nonimmediate_operand (op0, GET_MODE (op0)))
40051 	    op0 = copy_to_mode_reg (GET_MODE (op0), op0);
40052 	  emit_insn (gen (half, op0));
40053 	  op0 = half;
40054 	  if (GET_MODE (op3) != VOIDmode)
40055 	    {
40056 	      if (!nonimmediate_operand (op3, GET_MODE (op3)))
40057 		op3 = copy_to_mode_reg (GET_MODE (op3), op3);
40058 	      emit_insn (gen (half, op3));
40059 	      op3 = half;
40060 	    }
40061 	  break;
40062 	case IX86_BUILTIN_GATHER3ALTDIV8SF:
40063 	case IX86_BUILTIN_GATHER3ALTDIV8SI:
40064 	case IX86_BUILTIN_GATHERALTDIV8SF:
40065 	case IX86_BUILTIN_GATHERALTDIV8SI:
40066 	  half = gen_reg_rtx (mode0);
40067 	  if (mode0 == V4SFmode)
40068 	    gen = gen_vec_extract_lo_v8sf;
40069 	  else
40070 	    gen = gen_vec_extract_lo_v8si;
40071 	  if (!nonimmediate_operand (op0, GET_MODE (op0)))
40072 	    op0 = copy_to_mode_reg (GET_MODE (op0), op0);
40073 	  emit_insn (gen (half, op0));
40074 	  op0 = half;
40075 	  if (GET_MODE (op3) != VOIDmode)
40076 	    {
40077 	      if (!nonimmediate_operand (op3, GET_MODE (op3)))
40078 		op3 = copy_to_mode_reg (GET_MODE (op3), op3);
40079 	      emit_insn (gen (half, op3));
40080 	      op3 = half;
40081 	    }
40082 	  break;
40083 	default:
40084 	  break;
40085 	}
40086 
40087       /* Force memory operand only with base register here.  But we
40088 	 don't want to do it on memory operand for other builtin
40089 	 functions.  */
40090       op1 = ix86_zero_extend_to_Pmode (op1);
40091 
40092       if (!insn_data[icode].operand[1].predicate (op0, mode0))
40093 	op0 = copy_to_mode_reg (mode0, op0);
40094       if (!insn_data[icode].operand[2].predicate (op1, Pmode))
40095 	op1 = copy_to_mode_reg (Pmode, op1);
40096       if (!insn_data[icode].operand[3].predicate (op2, mode2))
40097 	op2 = copy_to_mode_reg (mode2, op2);
40098 
40099       op3 = fixup_modeless_constant (op3, mode3);
40100 
40101       if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
40102 	{
40103 	  if (!insn_data[icode].operand[4].predicate (op3, mode3))
40104 	    op3 = copy_to_mode_reg (mode3, op3);
40105 	}
40106       else
40107 	{
40108 	  op3 = copy_to_reg (op3);
40109 	  op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
40110 	}
40111       if (!insn_data[icode].operand[5].predicate (op4, mode4))
40112 	{
40113           error ("the last argument must be scale 1, 2, 4, 8");
40114           return const0_rtx;
40115 	}
40116 
40117       /* Optimize.  If mask is known to have all high bits set,
40118 	 replace op0 with pc_rtx to signal that the instruction
40119 	 overwrites the whole destination and doesn't use its
40120 	 previous contents.  */
40121       if (optimize)
40122 	{
40123 	  if (TREE_CODE (arg3) == INTEGER_CST)
40124 	    {
40125 	      if (integer_all_onesp (arg3))
40126 		op0 = pc_rtx;
40127 	    }
40128 	  else if (TREE_CODE (arg3) == VECTOR_CST)
40129 	    {
40130 	      unsigned int negative = 0;
40131 	      for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
40132 		{
40133 		  tree cst = VECTOR_CST_ELT (arg3, i);
40134 		  if (TREE_CODE (cst) == INTEGER_CST
40135 		      && tree_int_cst_sign_bit (cst))
40136 		    negative++;
40137 		  else if (TREE_CODE (cst) == REAL_CST
40138 			   && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
40139 		    negative++;
40140 		}
40141 	      if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
40142 		op0 = pc_rtx;
40143 	    }
40144 	  else if (TREE_CODE (arg3) == SSA_NAME
40145 		   && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
40146 	    {
40147 	      /* Recognize also when mask is like:
40148 		 __v2df src = _mm_setzero_pd ();
40149 		 __v2df mask = _mm_cmpeq_pd (src, src);
40150 		 or
40151 		 __v8sf src = _mm256_setzero_ps ();
40152 		 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
40153 		 as that is a cheaper way to load all ones into
40154 		 a register than having to load a constant from
40155 		 memory.  */
40156 	      gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
40157 	      if (is_gimple_call (def_stmt))
40158 		{
40159 		  tree fndecl = gimple_call_fndecl (def_stmt);
40160 		  if (fndecl
40161 		      && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
40162 		    switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
40163 		      {
40164 		      case IX86_BUILTIN_CMPPD:
40165 		      case IX86_BUILTIN_CMPPS:
40166 		      case IX86_BUILTIN_CMPPD256:
40167 		      case IX86_BUILTIN_CMPPS256:
40168 			if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
40169 			  break;
40170 			/* FALLTHRU */
40171 		      case IX86_BUILTIN_CMPEQPD:
40172 		      case IX86_BUILTIN_CMPEQPS:
40173 			if (initializer_zerop (gimple_call_arg (def_stmt, 0))
40174 			    && initializer_zerop (gimple_call_arg (def_stmt,
40175 								   1)))
40176 			  op0 = pc_rtx;
40177 			break;
40178 		      default:
40179 			break;
40180 		      }
40181 		}
40182 	    }
40183 	}
40184 
40185       pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
40186       if (! pat)
40187 	return const0_rtx;
40188       emit_insn (pat);
40189 
40190       switch (fcode)
40191 	{
40192 	case IX86_BUILTIN_GATHER3DIV16SF:
40193 	  if (target == NULL_RTX)
40194 	    target = gen_reg_rtx (V8SFmode);
40195 	  emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
40196 	  break;
40197 	case IX86_BUILTIN_GATHER3DIV16SI:
40198 	  if (target == NULL_RTX)
40199 	    target = gen_reg_rtx (V8SImode);
40200 	  emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
40201 	  break;
40202 	case IX86_BUILTIN_GATHER3DIV8SF:
40203 	case IX86_BUILTIN_GATHERDIV8SF:
40204 	  if (target == NULL_RTX)
40205 	    target = gen_reg_rtx (V4SFmode);
40206 	  emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
40207 	  break;
40208 	case IX86_BUILTIN_GATHER3DIV8SI:
40209 	case IX86_BUILTIN_GATHERDIV8SI:
40210 	  if (target == NULL_RTX)
40211 	    target = gen_reg_rtx (V4SImode);
40212 	  emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
40213 	  break;
40214 	default:
40215 	  target = subtarget;
40216 	  break;
40217 	}
40218       return target;
40219 
40220     scatter_gen:
40221       arg0 = CALL_EXPR_ARG (exp, 0);
40222       arg1 = CALL_EXPR_ARG (exp, 1);
40223       arg2 = CALL_EXPR_ARG (exp, 2);
40224       arg3 = CALL_EXPR_ARG (exp, 3);
40225       arg4 = CALL_EXPR_ARG (exp, 4);
40226       op0 = expand_normal (arg0);
40227       op1 = expand_normal (arg1);
40228       op2 = expand_normal (arg2);
40229       op3 = expand_normal (arg3);
40230       op4 = expand_normal (arg4);
40231       mode1 = insn_data[icode].operand[1].mode;
40232       mode2 = insn_data[icode].operand[2].mode;
40233       mode3 = insn_data[icode].operand[3].mode;
40234       mode4 = insn_data[icode].operand[4].mode;
40235 
40236       /* Force memory operand only with base register here.  But we
40237 	 don't want to do it on memory operand for other builtin
40238 	 functions.  */
40239       op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
40240 
40241       if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40242 	op0 = copy_to_mode_reg (Pmode, op0);
40243 
40244       op1 = fixup_modeless_constant (op1, mode1);
40245 
40246       if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
40247 	{
40248 	  if (!insn_data[icode].operand[1].predicate (op1, mode1))
40249 	    op1 = copy_to_mode_reg (mode1, op1);
40250 	}
40251       else
40252 	{
40253 	  op1 = copy_to_reg (op1);
40254 	  op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
40255 	}
40256 
40257       if (!insn_data[icode].operand[2].predicate (op2, mode2))
40258 	op2 = copy_to_mode_reg (mode2, op2);
40259 
40260       if (!insn_data[icode].operand[3].predicate (op3, mode3))
40261 	op3 = copy_to_mode_reg (mode3, op3);
40262 
40263       if (!insn_data[icode].operand[4].predicate (op4, mode4))
40264 	{
40265 	  error ("the last argument must be scale 1, 2, 4, 8");
40266 	  return const0_rtx;
40267 	}
40268 
40269       pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40270       if (! pat)
40271 	return const0_rtx;
40272 
40273       emit_insn (pat);
40274       return 0;
40275 
40276     vec_prefetch_gen:
40277       arg0 = CALL_EXPR_ARG (exp, 0);
40278       arg1 = CALL_EXPR_ARG (exp, 1);
40279       arg2 = CALL_EXPR_ARG (exp, 2);
40280       arg3 = CALL_EXPR_ARG (exp, 3);
40281       arg4 = CALL_EXPR_ARG (exp, 4);
40282       op0 = expand_normal (arg0);
40283       op1 = expand_normal (arg1);
40284       op2 = expand_normal (arg2);
40285       op3 = expand_normal (arg3);
40286       op4 = expand_normal (arg4);
40287       mode0 = insn_data[icode].operand[0].mode;
40288       mode1 = insn_data[icode].operand[1].mode;
40289       mode3 = insn_data[icode].operand[3].mode;
40290       mode4 = insn_data[icode].operand[4].mode;
40291 
40292       op0 = fixup_modeless_constant (op0, mode0);
40293 
40294       if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
40295 	{
40296 	  if (!insn_data[icode].operand[0].predicate (op0, mode0))
40297 	    op0 = copy_to_mode_reg (mode0, op0);
40298 	}
40299       else
40300 	{
40301 	  op0 = copy_to_reg (op0);
40302 	  op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40303 	}
40304 
40305       if (!insn_data[icode].operand[1].predicate (op1, mode1))
40306 	op1 = copy_to_mode_reg (mode1, op1);
40307 
40308       /* Force memory operand only with base register here.  But we
40309 	 don't want to do it on memory operand for other builtin
40310 	 functions.  */
40311       op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40312 
40313       if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40314 	op2 = copy_to_mode_reg (Pmode, op2);
40315 
40316       if (!insn_data[icode].operand[3].predicate (op3, mode3))
40317 	{
40318 	  error ("the forth argument must be scale 1, 2, 4, 8");
40319 	  return const0_rtx;
40320 	}
40321 
40322       if (!insn_data[icode].operand[4].predicate (op4, mode4))
40323 	{
40324 	  error ("incorrect hint operand");
40325 	  return const0_rtx;
40326 	}
40327 
40328       pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40329       if (! pat)
40330 	return const0_rtx;
40331 
40332       emit_insn (pat);
40333 
40334       return 0;
40335 
40336     case IX86_BUILTIN_XABORT:
40337       icode = CODE_FOR_xabort;
40338       arg0 = CALL_EXPR_ARG (exp, 0);
40339       op0 = expand_normal (arg0);
40340       mode0 = insn_data[icode].operand[0].mode;
40341       if (!insn_data[icode].operand[0].predicate (op0, mode0))
40342 	{
40343 	  error ("the xabort's argument must be an 8-bit immediate");
40344 	  return const0_rtx;
40345 	}
40346       emit_insn (gen_xabort (op0));
40347       return 0;
40348 
40349     default:
40350       break;
40351     }
40352 
40353   for (i = 0, d = bdesc_special_args;
40354        i < ARRAY_SIZE (bdesc_special_args);
40355        i++, d++)
40356     if (d->code == fcode)
40357       return ix86_expand_special_args_builtin (d, exp, target);
40358 
40359   for (i = 0, d = bdesc_args;
40360        i < ARRAY_SIZE (bdesc_args);
40361        i++, d++)
40362     if (d->code == fcode)
40363       switch (fcode)
40364 	{
40365 	case IX86_BUILTIN_FABSQ:
40366 	case IX86_BUILTIN_COPYSIGNQ:
40367 	  if (!TARGET_SSE)
40368 	    /* Emit a normal call if SSE isn't available.  */
40369 	    return expand_call (exp, target, ignore);
40370 	default:
40371 	  return ix86_expand_args_builtin (d, exp, target);
40372 	}
40373 
40374   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40375     if (d->code == fcode)
40376       return ix86_expand_sse_comi (d, exp, target);
40377 
40378   for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40379     if (d->code == fcode)
40380       return ix86_expand_round_builtin (d, exp, target);
40381 
40382   for (i = 0, d = bdesc_pcmpestr;
40383        i < ARRAY_SIZE (bdesc_pcmpestr);
40384        i++, d++)
40385     if (d->code == fcode)
40386       return ix86_expand_sse_pcmpestr (d, exp, target);
40387 
40388   for (i = 0, d = bdesc_pcmpistr;
40389        i < ARRAY_SIZE (bdesc_pcmpistr);
40390        i++, d++)
40391     if (d->code == fcode)
40392       return ix86_expand_sse_pcmpistr (d, exp, target);
40393 
40394   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40395     if (d->code == fcode)
40396       return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40397 					    (enum ix86_builtin_func_type)
40398 					    d->flag, d->comparison);
40399 
40400   gcc_unreachable ();
40401 }
40402 
40403 /* This returns the target-specific builtin with code CODE if
40404    current_function_decl has visibility on this builtin, which is checked
40405    using isa flags.  Returns NULL_TREE otherwise.  */
40406 
40407 static tree ix86_get_builtin (enum ix86_builtins code)
40408 {
40409   struct cl_target_option *opts;
40410   tree target_tree = NULL_TREE;
40411 
40412   /* Determine the isa flags of current_function_decl.  */
40413 
40414   if (current_function_decl)
40415     target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40416 
40417   if (target_tree == NULL)
40418     target_tree = target_option_default_node;
40419 
40420   opts = TREE_TARGET_OPTION (target_tree);
40421 
40422   if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40423     return ix86_builtin_decl (code, true);
40424   else
40425     return NULL_TREE;
40426 }
40427 
40428 /* Return function decl for target specific builtin
40429    for given MPX builtin passed i FCODE.  */
40430 static tree
40431 ix86_builtin_mpx_function (unsigned fcode)
40432 {
40433   switch (fcode)
40434     {
40435     case BUILT_IN_CHKP_BNDMK:
40436       return ix86_builtins[IX86_BUILTIN_BNDMK];
40437 
40438     case BUILT_IN_CHKP_BNDSTX:
40439       return ix86_builtins[IX86_BUILTIN_BNDSTX];
40440 
40441     case BUILT_IN_CHKP_BNDLDX:
40442       return ix86_builtins[IX86_BUILTIN_BNDLDX];
40443 
40444     case BUILT_IN_CHKP_BNDCL:
40445       return ix86_builtins[IX86_BUILTIN_BNDCL];
40446 
40447     case BUILT_IN_CHKP_BNDCU:
40448       return ix86_builtins[IX86_BUILTIN_BNDCU];
40449 
40450     case BUILT_IN_CHKP_BNDRET:
40451       return ix86_builtins[IX86_BUILTIN_BNDRET];
40452 
40453     case BUILT_IN_CHKP_INTERSECT:
40454       return ix86_builtins[IX86_BUILTIN_BNDINT];
40455 
40456     case BUILT_IN_CHKP_NARROW:
40457       return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40458 
40459     case BUILT_IN_CHKP_SIZEOF:
40460       return ix86_builtins[IX86_BUILTIN_SIZEOF];
40461 
40462     case BUILT_IN_CHKP_EXTRACT_LOWER:
40463       return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40464 
40465     case BUILT_IN_CHKP_EXTRACT_UPPER:
40466       return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40467 
40468     default:
40469       return NULL_TREE;
40470     }
40471 
40472   gcc_unreachable ();
40473 }
40474 
40475 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40476 
40477    Return an address to be used to load/store bounds for pointer
40478    passed in SLOT.
40479 
40480    SLOT_NO is an integer constant holding number of a target
40481    dependent special slot to be used in case SLOT is not a memory.
40482 
40483    SPECIAL_BASE is a pointer to be used as a base of fake address
40484    to access special slots in Bounds Table.  SPECIAL_BASE[-1],
40485    SPECIAL_BASE[-2] etc. will be used as fake pointer locations.  */
40486 
40487 static rtx
40488 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40489 {
40490   rtx addr = NULL;
40491 
40492   /* NULL slot means we pass bounds for pointer not passed to the
40493      function at all.  Register slot means we pass pointer in a
40494      register.  In both these cases bounds are passed via Bounds
40495      Table.  Since we do not have actual pointer stored in memory,
40496      we have to use fake addresses to access Bounds Table.  We
40497      start with (special_base - sizeof (void*)) and decrease this
40498      address by pointer size to get addresses for other slots.  */
40499   if (!slot || REG_P (slot))
40500     {
40501       gcc_assert (CONST_INT_P (slot_no));
40502       addr = plus_constant (Pmode, special_base,
40503 			    -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40504     }
40505   /* If pointer is passed in a memory then its address is used to
40506      access Bounds Table.  */
40507   else if (MEM_P (slot))
40508     {
40509       addr = XEXP (slot, 0);
40510       if (!register_operand (addr, Pmode))
40511 	addr = copy_addr_to_reg (addr);
40512     }
40513   else
40514     gcc_unreachable ();
40515 
40516   return addr;
40517 }
40518 
40519 /* Expand pass uses this hook to load bounds for function parameter
40520    PTR passed in SLOT in case its bounds are not passed in a register.
40521 
40522    If SLOT is a memory, then bounds are loaded as for regular pointer
40523    loaded from memory.  PTR may be NULL in case SLOT is a memory.
40524    In such case value of PTR (if required) may be loaded from SLOT.
40525 
40526    If SLOT is NULL or a register then SLOT_NO is an integer constant
40527    holding number of the target dependent special slot which should be
40528    used to obtain bounds.
40529 
40530    Return loaded bounds.  */
40531 
40532 static rtx
40533 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40534 {
40535   rtx reg = gen_reg_rtx (BNDmode);
40536   rtx addr;
40537 
40538   /* Get address to be used to access Bounds Table.  Special slots start
40539      at the location of return address of the current function.  */
40540   addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40541 
40542   /* Load pointer value from a memory if we don't have it.  */
40543   if (!ptr)
40544     {
40545       gcc_assert (MEM_P (slot));
40546       ptr = copy_addr_to_reg (slot);
40547     }
40548 
40549   emit_insn (BNDmode == BND64mode
40550 	     ? gen_bnd64_ldx (reg, addr, ptr)
40551 	     : gen_bnd32_ldx (reg, addr, ptr));
40552 
40553   return reg;
40554 }
40555 
40556 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40557    passed in SLOT in case BOUNDS are not passed in a register.
40558 
40559    If SLOT is a memory, then BOUNDS are stored as for regular pointer
40560    stored in memory.  PTR may be NULL in case SLOT is a memory.
40561    In such case value of PTR (if required) may be loaded from SLOT.
40562 
40563    If SLOT is NULL or a register then SLOT_NO is an integer constant
40564    holding number of the target dependent special slot which should be
40565    used to store BOUNDS.  */
40566 
40567 static void
40568 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40569 {
40570   rtx addr;
40571 
40572   /* Get address to be used to access Bounds Table.  Special slots start
40573      at the location of return address of a called function.  */
40574   addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40575 
40576   /* Load pointer value from a memory if we don't have it.  */
40577   if (!ptr)
40578     {
40579       gcc_assert (MEM_P (slot));
40580       ptr = copy_addr_to_reg (slot);
40581     }
40582 
40583   gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40584   if (!register_operand (bounds, BNDmode))
40585     bounds = copy_to_mode_reg (BNDmode, bounds);
40586 
40587   emit_insn (BNDmode == BND64mode
40588 	     ? gen_bnd64_stx (addr, ptr, bounds)
40589 	     : gen_bnd32_stx (addr, ptr, bounds));
40590 }
40591 
40592 /* Load and return bounds returned by function in SLOT.  */
40593 
40594 static rtx
40595 ix86_load_returned_bounds (rtx slot)
40596 {
40597   rtx res;
40598 
40599   gcc_assert (REG_P (slot));
40600   res = gen_reg_rtx (BNDmode);
40601   emit_move_insn (res, slot);
40602 
40603   return res;
40604 }
40605 
40606 /* Store BOUNDS returned by function into SLOT.  */
40607 
40608 static void
40609 ix86_store_returned_bounds (rtx slot, rtx bounds)
40610 {
40611   gcc_assert (REG_P (slot));
40612   emit_move_insn (slot, bounds);
40613 }
40614 
40615 /* Returns a function decl for a vectorized version of the builtin function
40616    with builtin function code FN and the result vector type TYPE, or NULL_TREE
40617    if it is not available.  */
40618 
40619 static tree
40620 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40621 				  tree type_in)
40622 {
40623   machine_mode in_mode, out_mode;
40624   int in_n, out_n;
40625   enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40626 
40627   if (TREE_CODE (type_out) != VECTOR_TYPE
40628       || TREE_CODE (type_in) != VECTOR_TYPE
40629       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40630     return NULL_TREE;
40631 
40632   out_mode = TYPE_MODE (TREE_TYPE (type_out));
40633   out_n = TYPE_VECTOR_SUBPARTS (type_out);
40634   in_mode = TYPE_MODE (TREE_TYPE (type_in));
40635   in_n = TYPE_VECTOR_SUBPARTS (type_in);
40636 
40637   switch (fn)
40638     {
40639     case BUILT_IN_SQRT:
40640       if (out_mode == DFmode && in_mode == DFmode)
40641 	{
40642 	  if (out_n == 2 && in_n == 2)
40643 	    return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40644 	  else if (out_n == 4 && in_n == 4)
40645 	    return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40646 	  else if (out_n == 8 && in_n == 8)
40647 	    return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40648 	}
40649       break;
40650 
40651     case BUILT_IN_EXP2F:
40652       if (out_mode == SFmode && in_mode == SFmode)
40653 	{
40654 	  if (out_n == 16 && in_n == 16)
40655 	    return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40656 	}
40657       break;
40658 
40659     case BUILT_IN_SQRTF:
40660       if (out_mode == SFmode && in_mode == SFmode)
40661 	{
40662 	  if (out_n == 4 && in_n == 4)
40663 	    return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40664 	  else if (out_n == 8 && in_n == 8)
40665 	    return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40666 	  else if (out_n == 16 && in_n == 16)
40667 	    return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40668 	}
40669       break;
40670 
40671     case BUILT_IN_IFLOOR:
40672     case BUILT_IN_LFLOOR:
40673     case BUILT_IN_LLFLOOR:
40674       /* The round insn does not trap on denormals.  */
40675       if (flag_trapping_math || !TARGET_ROUND)
40676 	break;
40677 
40678       if (out_mode == SImode && in_mode == DFmode)
40679 	{
40680 	  if (out_n == 4 && in_n == 2)
40681 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40682 	  else if (out_n == 8 && in_n == 4)
40683 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40684 	  else if (out_n == 16 && in_n == 8)
40685 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40686 	}
40687       break;
40688 
40689     case BUILT_IN_IFLOORF:
40690     case BUILT_IN_LFLOORF:
40691     case BUILT_IN_LLFLOORF:
40692       /* The round insn does not trap on denormals.  */
40693       if (flag_trapping_math || !TARGET_ROUND)
40694 	break;
40695 
40696       if (out_mode == SImode && in_mode == SFmode)
40697 	{
40698 	  if (out_n == 4 && in_n == 4)
40699 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40700 	  else if (out_n == 8 && in_n == 8)
40701 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40702 	}
40703       break;
40704 
40705     case BUILT_IN_ICEIL:
40706     case BUILT_IN_LCEIL:
40707     case BUILT_IN_LLCEIL:
40708       /* The round insn does not trap on denormals.  */
40709       if (flag_trapping_math || !TARGET_ROUND)
40710 	break;
40711 
40712       if (out_mode == SImode && in_mode == DFmode)
40713 	{
40714 	  if (out_n == 4 && in_n == 2)
40715 	    return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40716 	  else if (out_n == 8 && in_n == 4)
40717 	    return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40718 	  else if (out_n == 16 && in_n == 8)
40719 	    return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40720 	}
40721       break;
40722 
40723     case BUILT_IN_ICEILF:
40724     case BUILT_IN_LCEILF:
40725     case BUILT_IN_LLCEILF:
40726       /* The round insn does not trap on denormals.  */
40727       if (flag_trapping_math || !TARGET_ROUND)
40728 	break;
40729 
40730       if (out_mode == SImode && in_mode == SFmode)
40731 	{
40732 	  if (out_n == 4 && in_n == 4)
40733 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40734 	  else if (out_n == 8 && in_n == 8)
40735 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40736 	}
40737       break;
40738 
40739     case BUILT_IN_IRINT:
40740     case BUILT_IN_LRINT:
40741     case BUILT_IN_LLRINT:
40742       if (out_mode == SImode && in_mode == DFmode)
40743 	{
40744 	  if (out_n == 4 && in_n == 2)
40745 	    return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40746 	  else if (out_n == 8 && in_n == 4)
40747 	    return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40748 	}
40749       break;
40750 
40751     case BUILT_IN_IRINTF:
40752     case BUILT_IN_LRINTF:
40753     case BUILT_IN_LLRINTF:
40754       if (out_mode == SImode && in_mode == SFmode)
40755 	{
40756 	  if (out_n == 4 && in_n == 4)
40757 	    return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40758 	  else if (out_n == 8 && in_n == 8)
40759 	    return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40760 	}
40761       break;
40762 
40763     case BUILT_IN_IROUND:
40764     case BUILT_IN_LROUND:
40765     case BUILT_IN_LLROUND:
40766       /* The round insn does not trap on denormals.  */
40767       if (flag_trapping_math || !TARGET_ROUND)
40768 	break;
40769 
40770       if (out_mode == SImode && in_mode == DFmode)
40771 	{
40772 	  if (out_n == 4 && in_n == 2)
40773 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40774 	  else if (out_n == 8 && in_n == 4)
40775 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40776 	  else if (out_n == 16 && in_n == 8)
40777 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40778 	}
40779       break;
40780 
40781     case BUILT_IN_IROUNDF:
40782     case BUILT_IN_LROUNDF:
40783     case BUILT_IN_LLROUNDF:
40784       /* The round insn does not trap on denormals.  */
40785       if (flag_trapping_math || !TARGET_ROUND)
40786 	break;
40787 
40788       if (out_mode == SImode && in_mode == SFmode)
40789 	{
40790 	  if (out_n == 4 && in_n == 4)
40791 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40792 	  else if (out_n == 8 && in_n == 8)
40793 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40794 	}
40795       break;
40796 
40797     case BUILT_IN_COPYSIGN:
40798       if (out_mode == DFmode && in_mode == DFmode)
40799 	{
40800 	  if (out_n == 2 && in_n == 2)
40801 	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40802 	  else if (out_n == 4 && in_n == 4)
40803 	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40804 	  else if (out_n == 8 && in_n == 8)
40805 	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40806 	}
40807       break;
40808 
40809     case BUILT_IN_COPYSIGNF:
40810       if (out_mode == SFmode && in_mode == SFmode)
40811 	{
40812 	  if (out_n == 4 && in_n == 4)
40813 	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40814 	  else if (out_n == 8 && in_n == 8)
40815 	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40816 	  else if (out_n == 16 && in_n == 16)
40817 	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40818 	}
40819       break;
40820 
40821     case BUILT_IN_FLOOR:
40822       /* The round insn does not trap on denormals.  */
40823       if (flag_trapping_math || !TARGET_ROUND)
40824 	break;
40825 
40826       if (out_mode == DFmode && in_mode == DFmode)
40827 	{
40828 	  if (out_n == 2 && in_n == 2)
40829 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40830 	  else if (out_n == 4 && in_n == 4)
40831 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40832 	}
40833       break;
40834 
40835     case BUILT_IN_FLOORF:
40836       /* The round insn does not trap on denormals.  */
40837       if (flag_trapping_math || !TARGET_ROUND)
40838 	break;
40839 
40840       if (out_mode == SFmode && in_mode == SFmode)
40841 	{
40842 	  if (out_n == 4 && in_n == 4)
40843 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40844 	  else if (out_n == 8 && in_n == 8)
40845 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40846 	}
40847       break;
40848 
40849     case BUILT_IN_CEIL:
40850       /* The round insn does not trap on denormals.  */
40851       if (flag_trapping_math || !TARGET_ROUND)
40852 	break;
40853 
40854       if (out_mode == DFmode && in_mode == DFmode)
40855 	{
40856 	  if (out_n == 2 && in_n == 2)
40857 	    return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40858 	  else if (out_n == 4 && in_n == 4)
40859 	    return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40860 	}
40861       break;
40862 
40863     case BUILT_IN_CEILF:
40864       /* The round insn does not trap on denormals.  */
40865       if (flag_trapping_math || !TARGET_ROUND)
40866 	break;
40867 
40868       if (out_mode == SFmode && in_mode == SFmode)
40869 	{
40870 	  if (out_n == 4 && in_n == 4)
40871 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40872 	  else if (out_n == 8 && in_n == 8)
40873 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40874 	}
40875       break;
40876 
40877     case BUILT_IN_TRUNC:
40878       /* The round insn does not trap on denormals.  */
40879       if (flag_trapping_math || !TARGET_ROUND)
40880 	break;
40881 
40882       if (out_mode == DFmode && in_mode == DFmode)
40883 	{
40884 	  if (out_n == 2 && in_n == 2)
40885 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40886 	  else if (out_n == 4 && in_n == 4)
40887 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40888 	}
40889       break;
40890 
40891     case BUILT_IN_TRUNCF:
40892       /* The round insn does not trap on denormals.  */
40893       if (flag_trapping_math || !TARGET_ROUND)
40894 	break;
40895 
40896       if (out_mode == SFmode && in_mode == SFmode)
40897 	{
40898 	  if (out_n == 4 && in_n == 4)
40899 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40900 	  else if (out_n == 8 && in_n == 8)
40901 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40902 	}
40903       break;
40904 
40905     case BUILT_IN_RINT:
40906       /* The round insn does not trap on denormals.  */
40907       if (flag_trapping_math || !TARGET_ROUND)
40908 	break;
40909 
40910       if (out_mode == DFmode && in_mode == DFmode)
40911 	{
40912 	  if (out_n == 2 && in_n == 2)
40913 	    return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40914 	  else if (out_n == 4 && in_n == 4)
40915 	    return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40916 	}
40917       break;
40918 
40919     case BUILT_IN_RINTF:
40920       /* The round insn does not trap on denormals.  */
40921       if (flag_trapping_math || !TARGET_ROUND)
40922 	break;
40923 
40924       if (out_mode == SFmode && in_mode == SFmode)
40925 	{
40926 	  if (out_n == 4 && in_n == 4)
40927 	    return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40928 	  else if (out_n == 8 && in_n == 8)
40929 	    return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40930 	}
40931       break;
40932 
40933     case BUILT_IN_ROUND:
40934       /* The round insn does not trap on denormals.  */
40935       if (flag_trapping_math || !TARGET_ROUND)
40936 	break;
40937 
40938       if (out_mode == DFmode && in_mode == DFmode)
40939 	{
40940 	  if (out_n == 2 && in_n == 2)
40941 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40942 	  else if (out_n == 4 && in_n == 4)
40943 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40944 	}
40945       break;
40946 
40947     case BUILT_IN_ROUNDF:
40948       /* The round insn does not trap on denormals.  */
40949       if (flag_trapping_math || !TARGET_ROUND)
40950 	break;
40951 
40952       if (out_mode == SFmode && in_mode == SFmode)
40953 	{
40954 	  if (out_n == 4 && in_n == 4)
40955 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40956 	  else if (out_n == 8 && in_n == 8)
40957 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40958 	}
40959       break;
40960 
40961     case BUILT_IN_FMA:
40962       if (out_mode == DFmode && in_mode == DFmode)
40963 	{
40964 	  if (out_n == 2 && in_n == 2)
40965 	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40966 	  if (out_n == 4 && in_n == 4)
40967 	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40968 	}
40969       break;
40970 
40971     case BUILT_IN_FMAF:
40972       if (out_mode == SFmode && in_mode == SFmode)
40973 	{
40974 	  if (out_n == 4 && in_n == 4)
40975 	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40976 	  if (out_n == 8 && in_n == 8)
40977 	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40978 	}
40979       break;
40980 
40981     default:
40982       break;
40983     }
40984 
40985   /* Dispatch to a handler for a vectorization library.  */
40986   if (ix86_veclib_handler)
40987     return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40988 				type_in);
40989 
40990   return NULL_TREE;
40991 }
40992 
40993 /* Handler for an SVML-style interface to
40994    a library with vectorized intrinsics.  */
40995 
40996 static tree
40997 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40998 {
40999   char name[20];
41000   tree fntype, new_fndecl, args;
41001   unsigned arity;
41002   const char *bname;
41003   machine_mode el_mode, in_mode;
41004   int n, in_n;
41005 
41006   /* The SVML is suitable for unsafe math only.  */
41007   if (!flag_unsafe_math_optimizations)
41008     return NULL_TREE;
41009 
41010   el_mode = TYPE_MODE (TREE_TYPE (type_out));
41011   n = TYPE_VECTOR_SUBPARTS (type_out);
41012   in_mode = TYPE_MODE (TREE_TYPE (type_in));
41013   in_n = TYPE_VECTOR_SUBPARTS (type_in);
41014   if (el_mode != in_mode
41015       || n != in_n)
41016     return NULL_TREE;
41017 
41018   switch (fn)
41019     {
41020     case BUILT_IN_EXP:
41021     case BUILT_IN_LOG:
41022     case BUILT_IN_LOG10:
41023     case BUILT_IN_POW:
41024     case BUILT_IN_TANH:
41025     case BUILT_IN_TAN:
41026     case BUILT_IN_ATAN:
41027     case BUILT_IN_ATAN2:
41028     case BUILT_IN_ATANH:
41029     case BUILT_IN_CBRT:
41030     case BUILT_IN_SINH:
41031     case BUILT_IN_SIN:
41032     case BUILT_IN_ASINH:
41033     case BUILT_IN_ASIN:
41034     case BUILT_IN_COSH:
41035     case BUILT_IN_COS:
41036     case BUILT_IN_ACOSH:
41037     case BUILT_IN_ACOS:
41038       if (el_mode != DFmode || n != 2)
41039 	return NULL_TREE;
41040       break;
41041 
41042     case BUILT_IN_EXPF:
41043     case BUILT_IN_LOGF:
41044     case BUILT_IN_LOG10F:
41045     case BUILT_IN_POWF:
41046     case BUILT_IN_TANHF:
41047     case BUILT_IN_TANF:
41048     case BUILT_IN_ATANF:
41049     case BUILT_IN_ATAN2F:
41050     case BUILT_IN_ATANHF:
41051     case BUILT_IN_CBRTF:
41052     case BUILT_IN_SINHF:
41053     case BUILT_IN_SINF:
41054     case BUILT_IN_ASINHF:
41055     case BUILT_IN_ASINF:
41056     case BUILT_IN_COSHF:
41057     case BUILT_IN_COSF:
41058     case BUILT_IN_ACOSHF:
41059     case BUILT_IN_ACOSF:
41060       if (el_mode != SFmode || n != 4)
41061 	return NULL_TREE;
41062       break;
41063 
41064     default:
41065       return NULL_TREE;
41066     }
41067 
41068   bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41069 
41070   if (fn == BUILT_IN_LOGF)
41071     strcpy (name, "vmlsLn4");
41072   else if (fn == BUILT_IN_LOG)
41073     strcpy (name, "vmldLn2");
41074   else if (n == 4)
41075     {
41076       sprintf (name, "vmls%s", bname+10);
41077       name[strlen (name)-1] = '4';
41078     }
41079   else
41080     sprintf (name, "vmld%s2", bname+10);
41081 
41082   /* Convert to uppercase. */
41083   name[4] &= ~0x20;
41084 
41085   arity = 0;
41086   for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41087        args;
41088        args = TREE_CHAIN (args))
41089     arity++;
41090 
41091   if (arity == 1)
41092     fntype = build_function_type_list (type_out, type_in, NULL);
41093   else
41094     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41095 
41096   /* Build a function declaration for the vectorized function.  */
41097   new_fndecl = build_decl (BUILTINS_LOCATION,
41098 			   FUNCTION_DECL, get_identifier (name), fntype);
41099   TREE_PUBLIC (new_fndecl) = 1;
41100   DECL_EXTERNAL (new_fndecl) = 1;
41101   DECL_IS_NOVOPS (new_fndecl) = 1;
41102   TREE_READONLY (new_fndecl) = 1;
41103 
41104   return new_fndecl;
41105 }
41106 
41107 /* Handler for an ACML-style interface to
41108    a library with vectorized intrinsics.  */
41109 
41110 static tree
41111 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
41112 {
41113   char name[20] = "__vr.._";
41114   tree fntype, new_fndecl, args;
41115   unsigned arity;
41116   const char *bname;
41117   machine_mode el_mode, in_mode;
41118   int n, in_n;
41119 
41120   /* The ACML is 64bits only and suitable for unsafe math only as
41121      it does not correctly support parts of IEEE with the required
41122      precision such as denormals.  */
41123   if (!TARGET_64BIT
41124       || !flag_unsafe_math_optimizations)
41125     return NULL_TREE;
41126 
41127   el_mode = TYPE_MODE (TREE_TYPE (type_out));
41128   n = TYPE_VECTOR_SUBPARTS (type_out);
41129   in_mode = TYPE_MODE (TREE_TYPE (type_in));
41130   in_n = TYPE_VECTOR_SUBPARTS (type_in);
41131   if (el_mode != in_mode
41132       || n != in_n)
41133     return NULL_TREE;
41134 
41135   switch (fn)
41136     {
41137     case BUILT_IN_SIN:
41138     case BUILT_IN_COS:
41139     case BUILT_IN_EXP:
41140     case BUILT_IN_LOG:
41141     case BUILT_IN_LOG2:
41142     case BUILT_IN_LOG10:
41143       name[4] = 'd';
41144       name[5] = '2';
41145       if (el_mode != DFmode
41146 	  || n != 2)
41147 	return NULL_TREE;
41148       break;
41149 
41150     case BUILT_IN_SINF:
41151     case BUILT_IN_COSF:
41152     case BUILT_IN_EXPF:
41153     case BUILT_IN_POWF:
41154     case BUILT_IN_LOGF:
41155     case BUILT_IN_LOG2F:
41156     case BUILT_IN_LOG10F:
41157       name[4] = 's';
41158       name[5] = '4';
41159       if (el_mode != SFmode
41160 	  || n != 4)
41161 	return NULL_TREE;
41162       break;
41163 
41164     default:
41165       return NULL_TREE;
41166     }
41167 
41168   bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41169   sprintf (name + 7, "%s", bname+10);
41170 
41171   arity = 0;
41172   for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41173        args;
41174        args = TREE_CHAIN (args))
41175     arity++;
41176 
41177   if (arity == 1)
41178     fntype = build_function_type_list (type_out, type_in, NULL);
41179   else
41180     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41181 
41182   /* Build a function declaration for the vectorized function.  */
41183   new_fndecl = build_decl (BUILTINS_LOCATION,
41184 			   FUNCTION_DECL, get_identifier (name), fntype);
41185   TREE_PUBLIC (new_fndecl) = 1;
41186   DECL_EXTERNAL (new_fndecl) = 1;
41187   DECL_IS_NOVOPS (new_fndecl) = 1;
41188   TREE_READONLY (new_fndecl) = 1;
41189 
41190   return new_fndecl;
41191 }
41192 
41193 /* Returns a decl of a function that implements gather load with
41194    memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
41195    Return NULL_TREE if it is not available.  */
41196 
41197 static tree
41198 ix86_vectorize_builtin_gather (const_tree mem_vectype,
41199 			       const_tree index_type, int scale)
41200 {
41201   bool si;
41202   enum ix86_builtins code;
41203 
41204   if (! TARGET_AVX2)
41205     return NULL_TREE;
41206 
41207   if ((TREE_CODE (index_type) != INTEGER_TYPE
41208        && !POINTER_TYPE_P (index_type))
41209       || (TYPE_MODE (index_type) != SImode
41210 	  && TYPE_MODE (index_type) != DImode))
41211     return NULL_TREE;
41212 
41213   if (TYPE_PRECISION (index_type) > POINTER_SIZE)
41214     return NULL_TREE;
41215 
41216   /* v*gather* insn sign extends index to pointer mode.  */
41217   if (TYPE_PRECISION (index_type) < POINTER_SIZE
41218       && TYPE_UNSIGNED (index_type))
41219     return NULL_TREE;
41220 
41221   if (scale <= 0
41222       || scale > 8
41223       || (scale & (scale - 1)) != 0)
41224     return NULL_TREE;
41225 
41226   si = TYPE_MODE (index_type) == SImode;
41227   switch (TYPE_MODE (mem_vectype))
41228     {
41229     case V2DFmode:
41230       if (TARGET_AVX512VL)
41231 	code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
41232       else
41233 	code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
41234       break;
41235     case V4DFmode:
41236       if (TARGET_AVX512VL)
41237 	code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
41238       else
41239 	code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
41240       break;
41241     case V2DImode:
41242       if (TARGET_AVX512VL)
41243 	code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
41244       else
41245 	code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
41246       break;
41247     case V4DImode:
41248       if (TARGET_AVX512VL)
41249 	code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
41250       else
41251 	code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
41252       break;
41253     case V4SFmode:
41254       if (TARGET_AVX512VL)
41255 	code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
41256       else
41257 	code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
41258       break;
41259     case V8SFmode:
41260       if (TARGET_AVX512VL)
41261 	code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
41262       else
41263 	code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
41264       break;
41265     case V4SImode:
41266       if (TARGET_AVX512VL)
41267 	code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
41268       else
41269 	code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
41270       break;
41271     case V8SImode:
41272       if (TARGET_AVX512VL)
41273 	code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41274       else
41275 	code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41276       break;
41277     case V8DFmode:
41278       if (TARGET_AVX512F)
41279 	code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41280       else
41281 	return NULL_TREE;
41282       break;
41283     case V8DImode:
41284       if (TARGET_AVX512F)
41285 	code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41286       else
41287 	return NULL_TREE;
41288       break;
41289     case V16SFmode:
41290       if (TARGET_AVX512F)
41291 	code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41292       else
41293 	return NULL_TREE;
41294       break;
41295     case V16SImode:
41296       if (TARGET_AVX512F)
41297 	code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41298       else
41299 	return NULL_TREE;
41300       break;
41301     default:
41302       return NULL_TREE;
41303     }
41304 
41305   return ix86_get_builtin (code);
41306 }
41307 
41308 /* Returns a code for a target-specific builtin that implements
41309    reciprocal of the function, or NULL_TREE if not available.  */
41310 
41311 static tree
41312 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41313 {
41314   if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41315 	 && flag_finite_math_only && !flag_trapping_math
41316 	 && flag_unsafe_math_optimizations))
41317     return NULL_TREE;
41318 
41319   if (md_fn)
41320     /* Machine dependent builtins.  */
41321     switch (fn)
41322       {
41323 	/* Vectorized version of sqrt to rsqrt conversion.  */
41324       case IX86_BUILTIN_SQRTPS_NR:
41325 	return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41326 
41327       case IX86_BUILTIN_SQRTPS_NR256:
41328 	return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41329 
41330       default:
41331 	return NULL_TREE;
41332       }
41333   else
41334     /* Normal builtins.  */
41335     switch (fn)
41336       {
41337 	/* Sqrt to rsqrt conversion.  */
41338       case BUILT_IN_SQRTF:
41339 	return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41340 
41341       default:
41342 	return NULL_TREE;
41343       }
41344 }
41345 
41346 /* Helper for avx_vpermilps256_operand et al.  This is also used by
41347    the expansion functions to turn the parallel back into a mask.
41348    The return value is 0 for no match and the imm8+1 for a match.  */
41349 
41350 int
41351 avx_vpermilp_parallel (rtx par, machine_mode mode)
41352 {
41353   unsigned i, nelt = GET_MODE_NUNITS (mode);
41354   unsigned mask = 0;
41355   unsigned char ipar[16] = {};  /* Silence -Wuninitialized warning.  */
41356 
41357   if (XVECLEN (par, 0) != (int) nelt)
41358     return 0;
41359 
41360   /* Validate that all of the elements are constants, and not totally
41361      out of range.  Copy the data into an integral array to make the
41362      subsequent checks easier.  */
41363   for (i = 0; i < nelt; ++i)
41364     {
41365       rtx er = XVECEXP (par, 0, i);
41366       unsigned HOST_WIDE_INT ei;
41367 
41368       if (!CONST_INT_P (er))
41369 	return 0;
41370       ei = INTVAL (er);
41371       if (ei >= nelt)
41372 	return 0;
41373       ipar[i] = ei;
41374     }
41375 
41376   switch (mode)
41377     {
41378     case V8DFmode:
41379       /* In the 512-bit DFmode case, we can only move elements within
41380          a 128-bit lane.  First fill the second part of the mask,
41381 	 then fallthru.  */
41382       for (i = 4; i < 6; ++i)
41383 	{
41384 	  if (ipar[i] < 4 || ipar[i] >= 6)
41385 	    return 0;
41386 	  mask |= (ipar[i] - 4) << i;
41387 	}
41388       for (i = 6; i < 8; ++i)
41389 	{
41390 	  if (ipar[i] < 6)
41391 	    return 0;
41392 	  mask |= (ipar[i] - 6) << i;
41393 	}
41394       /* FALLTHRU */
41395 
41396     case V4DFmode:
41397       /* In the 256-bit DFmode case, we can only move elements within
41398          a 128-bit lane.  */
41399       for (i = 0; i < 2; ++i)
41400 	{
41401 	  if (ipar[i] >= 2)
41402 	    return 0;
41403 	  mask |= ipar[i] << i;
41404 	}
41405       for (i = 2; i < 4; ++i)
41406 	{
41407 	  if (ipar[i] < 2)
41408 	    return 0;
41409 	  mask |= (ipar[i] - 2) << i;
41410 	}
41411       break;
41412 
41413     case V16SFmode:
41414       /* In 512 bit SFmode case, permutation in the upper 256 bits
41415 	 must mirror the permutation in the lower 256-bits.  */
41416       for (i = 0; i < 8; ++i)
41417 	if (ipar[i] + 8 != ipar[i + 8])
41418 	  return 0;
41419       /* FALLTHRU */
41420 
41421     case V8SFmode:
41422       /* In 256 bit SFmode case, we have full freedom of
41423          movement within the low 128-bit lane, but the high 128-bit
41424          lane must mirror the exact same pattern.  */
41425       for (i = 0; i < 4; ++i)
41426 	if (ipar[i] + 4 != ipar[i + 4])
41427 	  return 0;
41428       nelt = 4;
41429       /* FALLTHRU */
41430 
41431     case V2DFmode:
41432     case V4SFmode:
41433       /* In the 128-bit case, we've full freedom in the placement of
41434 	 the elements from the source operand.  */
41435       for (i = 0; i < nelt; ++i)
41436 	mask |= ipar[i] << (i * (nelt / 2));
41437       break;
41438 
41439     default:
41440       gcc_unreachable ();
41441     }
41442 
41443   /* Make sure success has a non-zero value by adding one.  */
41444   return mask + 1;
41445 }
41446 
41447 /* Helper for avx_vperm2f128_v4df_operand et al.  This is also used by
41448    the expansion functions to turn the parallel back into a mask.
41449    The return value is 0 for no match and the imm8+1 for a match.  */
41450 
41451 int
41452 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41453 {
41454   unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41455   unsigned mask = 0;
41456   unsigned char ipar[8] = {};  /* Silence -Wuninitialized warning.  */
41457 
41458   if (XVECLEN (par, 0) != (int) nelt)
41459     return 0;
41460 
41461   /* Validate that all of the elements are constants, and not totally
41462      out of range.  Copy the data into an integral array to make the
41463      subsequent checks easier.  */
41464   for (i = 0; i < nelt; ++i)
41465     {
41466       rtx er = XVECEXP (par, 0, i);
41467       unsigned HOST_WIDE_INT ei;
41468 
41469       if (!CONST_INT_P (er))
41470 	return 0;
41471       ei = INTVAL (er);
41472       if (ei >= 2 * nelt)
41473 	return 0;
41474       ipar[i] = ei;
41475     }
41476 
41477   /* Validate that the halves of the permute are halves.  */
41478   for (i = 0; i < nelt2 - 1; ++i)
41479     if (ipar[i] + 1 != ipar[i + 1])
41480       return 0;
41481   for (i = nelt2; i < nelt - 1; ++i)
41482     if (ipar[i] + 1 != ipar[i + 1])
41483       return 0;
41484 
41485   /* Reconstruct the mask.  */
41486   for (i = 0; i < 2; ++i)
41487     {
41488       unsigned e = ipar[i * nelt2];
41489       if (e % nelt2)
41490 	return 0;
41491       e /= nelt2;
41492       mask |= e << (i * 4);
41493     }
41494 
41495   /* Make sure success has a non-zero value by adding one.  */
41496   return mask + 1;
41497 }
41498 
41499 /* Return a register priority for hard reg REGNO.  */
41500 static int
41501 ix86_register_priority (int hard_regno)
41502 {
41503   /* ebp and r13 as the base always wants a displacement, r12 as the
41504      base always wants an index.  So discourage their usage in an
41505      address.  */
41506   if (hard_regno == R12_REG || hard_regno == R13_REG)
41507     return 0;
41508   if (hard_regno == BP_REG)
41509     return 1;
41510   /* New x86-64 int registers result in bigger code size.  Discourage
41511      them.  */
41512   if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41513     return 2;
41514   /* New x86-64 SSE registers result in bigger code size.  Discourage
41515      them.  */
41516   if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41517     return 2;
41518   /* Usage of AX register results in smaller code.  Prefer it.  */
41519   if (hard_regno == AX_REG)
41520     return 4;
41521   return 3;
41522 }
41523 
41524 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41525 
41526    Put float CONST_DOUBLE in the constant pool instead of fp regs.
41527    QImode must go into class Q_REGS.
41528    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
41529    movdf to do mem-to-mem moves through integer regs.  */
41530 
41531 static reg_class_t
41532 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41533 {
41534   machine_mode mode = GET_MODE (x);
41535 
41536   /* We're only allowed to return a subclass of CLASS.  Many of the
41537      following checks fail for NO_REGS, so eliminate that early.  */
41538   if (regclass == NO_REGS)
41539     return NO_REGS;
41540 
41541   /* All classes can load zeros.  */
41542   if (x == CONST0_RTX (mode))
41543     return regclass;
41544 
41545   /* Force constants into memory if we are loading a (nonzero) constant into
41546      an MMX, SSE or MASK register.  This is because there are no MMX/SSE/MASK
41547      instructions to load from a constant.  */
41548   if (CONSTANT_P (x)
41549       && (MAYBE_MMX_CLASS_P (regclass)
41550 	  || MAYBE_SSE_CLASS_P (regclass)
41551 	  || MAYBE_MASK_CLASS_P (regclass)))
41552     return NO_REGS;
41553 
41554   /* Prefer SSE regs only, if we can use them for math.  */
41555   if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41556     return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41557 
41558   /* Floating-point constants need more complex checks.  */
41559   if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41560     {
41561       /* General regs can load everything.  */
41562       if (reg_class_subset_p (regclass, GENERAL_REGS))
41563         return regclass;
41564 
41565       /* Floats can load 0 and 1 plus some others.  Note that we eliminated
41566 	 zero above.  We only want to wind up preferring 80387 registers if
41567 	 we plan on doing computation with them.  */
41568       if (TARGET_80387
41569 	  && standard_80387_constant_p (x) > 0)
41570 	{
41571 	  /* Limit class to non-sse.  */
41572 	  if (regclass == FLOAT_SSE_REGS)
41573 	    return FLOAT_REGS;
41574 	  if (regclass == FP_TOP_SSE_REGS)
41575 	    return FP_TOP_REG;
41576 	  if (regclass == FP_SECOND_SSE_REGS)
41577 	    return FP_SECOND_REG;
41578 	  if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41579 	    return regclass;
41580 	}
41581 
41582       return NO_REGS;
41583     }
41584 
41585   /* Generally when we see PLUS here, it's the function invariant
41586      (plus soft-fp const_int).  Which can only be computed into general
41587      regs.  */
41588   if (GET_CODE (x) == PLUS)
41589     return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41590 
41591   /* QImode constants are easy to load, but non-constant QImode data
41592      must go into Q_REGS.  */
41593   if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41594     {
41595       if (reg_class_subset_p (regclass, Q_REGS))
41596 	return regclass;
41597       if (reg_class_subset_p (Q_REGS, regclass))
41598 	return Q_REGS;
41599       return NO_REGS;
41600     }
41601 
41602   return regclass;
41603 }
41604 
41605 /* Discourage putting floating-point values in SSE registers unless
41606    SSE math is being used, and likewise for the 387 registers.  */
41607 static reg_class_t
41608 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41609 {
41610   machine_mode mode = GET_MODE (x);
41611 
41612   /* Restrict the output reload class to the register bank that we are doing
41613      math on.  If we would like not to return a subset of CLASS, reject this
41614      alternative: if reload cannot do this, it will still use its choice.  */
41615   mode = GET_MODE (x);
41616   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41617     return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41618 
41619   if (X87_FLOAT_MODE_P (mode))
41620     {
41621       if (regclass == FP_TOP_SSE_REGS)
41622 	return FP_TOP_REG;
41623       else if (regclass == FP_SECOND_SSE_REGS)
41624 	return FP_SECOND_REG;
41625       else
41626 	return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41627     }
41628 
41629   return regclass;
41630 }
41631 
41632 static reg_class_t
41633 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41634 		       machine_mode mode, secondary_reload_info *sri)
41635 {
41636   /* Double-word spills from general registers to non-offsettable memory
41637      references (zero-extended addresses) require special handling.  */
41638   if (TARGET_64BIT
41639       && MEM_P (x)
41640       && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41641       && INTEGER_CLASS_P (rclass)
41642       && !offsettable_memref_p (x))
41643     {
41644       sri->icode = (in_p
41645 		    ? CODE_FOR_reload_noff_load
41646 		    : CODE_FOR_reload_noff_store);
41647       /* Add the cost of moving address to a temporary.  */
41648       sri->extra_cost = 1;
41649 
41650       return NO_REGS;
41651     }
41652 
41653   /* QImode spills from non-QI registers require
41654      intermediate register on 32bit targets.  */
41655   if (mode == QImode
41656       && (MAYBE_MASK_CLASS_P (rclass)
41657 	  || (!TARGET_64BIT && !in_p
41658 	      && INTEGER_CLASS_P (rclass)
41659 	      && MAYBE_NON_Q_CLASS_P (rclass))))
41660     {
41661       int regno;
41662 
41663       if (REG_P (x))
41664 	regno = REGNO (x);
41665       else
41666 	regno = -1;
41667 
41668       if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41669 	regno = true_regnum (x);
41670 
41671       /* Return Q_REGS if the operand is in memory.  */
41672       if (regno == -1)
41673 	return Q_REGS;
41674     }
41675 
41676   /* This condition handles corner case where an expression involving
41677      pointers gets vectorized.  We're trying to use the address of a
41678      stack slot as a vector initializer.
41679 
41680      (set (reg:V2DI 74 [ vect_cst_.2 ])
41681           (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41682 
41683      Eventually frame gets turned into sp+offset like this:
41684 
41685      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41686           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41687 	                               (const_int 392 [0x188]))))
41688 
41689      That later gets turned into:
41690 
41691      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41692           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41693 	    (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41694 
41695      We'll have the following reload recorded:
41696 
41697      Reload 0: reload_in (DI) =
41698            (plus:DI (reg/f:DI 7 sp)
41699             (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41700      reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41701      SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41702      reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41703      reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41704      reload_reg_rtx: (reg:V2DI 22 xmm1)
41705 
41706      Which isn't going to work since SSE instructions can't handle scalar
41707      additions.  Returning GENERAL_REGS forces the addition into integer
41708      register and reload can handle subsequent reloads without problems.  */
41709 
41710   if (in_p && GET_CODE (x) == PLUS
41711       && SSE_CLASS_P (rclass)
41712       && SCALAR_INT_MODE_P (mode))
41713     return GENERAL_REGS;
41714 
41715   return NO_REGS;
41716 }
41717 
41718 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
41719 
41720 static bool
41721 ix86_class_likely_spilled_p (reg_class_t rclass)
41722 {
41723   switch (rclass)
41724     {
41725       case AREG:
41726       case DREG:
41727       case CREG:
41728       case BREG:
41729       case AD_REGS:
41730       case SIREG:
41731       case DIREG:
41732       case SSE_FIRST_REG:
41733       case FP_TOP_REG:
41734       case FP_SECOND_REG:
41735       case BND_REGS:
41736 	return true;
41737 
41738       default:
41739 	break;
41740     }
41741 
41742   return false;
41743 }
41744 
41745 /* If we are copying between general and FP registers, we need a memory
41746    location. The same is true for SSE and MMX registers.
41747 
41748    To optimize register_move_cost performance, allow inline variant.
41749 
41750    The macro can't work reliably when one of the CLASSES is class containing
41751    registers from multiple units (SSE, MMX, integer).  We avoid this by never
41752    combining those units in single alternative in the machine description.
41753    Ensure that this constraint holds to avoid unexpected surprises.
41754 
41755    When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41756    enforce these sanity checks.  */
41757 
41758 static inline bool
41759 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41760 				machine_mode mode, int strict)
41761 {
41762   if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41763     return false;
41764   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41765       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41766       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41767       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41768       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41769       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41770     {
41771       gcc_assert (!strict || lra_in_progress);
41772       return true;
41773     }
41774 
41775   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41776     return true;
41777 
41778   /* Between mask and general, we have moves no larger than word size.  */
41779   if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41780       && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41781   return true;
41782 
41783   /* ??? This is a lie.  We do have moves between mmx/general, and for
41784      mmx/sse2.  But by saying we need secondary memory we discourage the
41785      register allocator from using the mmx registers unless needed.  */
41786   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41787     return true;
41788 
41789   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41790     {
41791       /* SSE1 doesn't have any direct moves from other classes.  */
41792       if (!TARGET_SSE2)
41793 	return true;
41794 
41795       /* If the target says that inter-unit moves are more expensive
41796 	 than moving through memory, then don't generate them.  */
41797       if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41798 	  || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41799 	return true;
41800 
41801       /* Between SSE and general, we have moves no larger than word size.  */
41802       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41803 	return true;
41804     }
41805 
41806   return false;
41807 }
41808 
41809 bool
41810 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41811 			      machine_mode mode, int strict)
41812 {
41813   return inline_secondary_memory_needed (class1, class2, mode, strict);
41814 }
41815 
41816 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41817 
41818    On the 80386, this is the size of MODE in words,
41819    except in the FP regs, where a single reg is always enough.  */
41820 
41821 static unsigned char
41822 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41823 {
41824   if (MAYBE_INTEGER_CLASS_P (rclass))
41825     {
41826       if (mode == XFmode)
41827 	return (TARGET_64BIT ? 2 : 3);
41828       else if (mode == XCmode)
41829 	return (TARGET_64BIT ? 4 : 6);
41830       else
41831 	return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41832     }
41833   else
41834     {
41835       if (COMPLEX_MODE_P (mode))
41836 	return 2;
41837       else
41838 	return 1;
41839     }
41840 }
41841 
41842 /* Return true if the registers in CLASS cannot represent the change from
41843    modes FROM to TO.  */
41844 
41845 bool
41846 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41847 			       enum reg_class regclass)
41848 {
41849   if (from == to)
41850     return false;
41851 
41852   /* x87 registers can't do subreg at all, as all values are reformatted
41853      to extended precision.  */
41854   if (MAYBE_FLOAT_CLASS_P (regclass))
41855     return true;
41856 
41857   if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41858     {
41859       int from_size = GET_MODE_SIZE (from);
41860       int to_size = GET_MODE_SIZE (to);
41861 
41862       /* Vector registers do not support QI or HImode loads.  If we don't
41863 	 disallow a change to these modes, reload will assume it's ok to
41864 	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
41865 	 the vec_dupv4hi pattern.  */
41866       if (from_size < 4)
41867 	return true;
41868 
41869       /* Further, we cannot allow word_mode subregs of full vector modes.
41870          Otherwise the middle-end will assume it's ok to store to
41871          (subreg:DI (reg:TI 100) 0) in order to modify only the low 64 bits
41872          of the 128-bit register.  However, after reload the subreg will
41873          be dropped leaving a plain DImode store.  This is indistinguishable
41874          from a "normal" DImode move, and so we're justified to use movsd,
41875          which modifies the entire 128-bit register.  */
41876       if (to_size == UNITS_PER_WORD && from_size > UNITS_PER_WORD)
41877 	return true;
41878     }
41879 
41880   return false;
41881 }
41882 
41883 /* Return the cost of moving data of mode M between a
41884    register and memory.  A value of 2 is the default; this cost is
41885    relative to those in `REGISTER_MOVE_COST'.
41886 
41887    This function is used extensively by register_move_cost that is used to
41888    build tables at startup.  Make it inline in this case.
41889    When IN is 2, return maximum of in and out move cost.
41890 
41891    If moving between registers and memory is more expensive than
41892    between two registers, you should define this macro to express the
41893    relative cost.
41894 
41895    Model also increased moving costs of QImode registers in non
41896    Q_REGS classes.
41897  */
41898 static inline int
41899 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41900 			 int in)
41901 {
41902   int cost;
41903   if (FLOAT_CLASS_P (regclass))
41904     {
41905       int index;
41906       switch (mode)
41907 	{
41908 	  case SFmode:
41909 	    index = 0;
41910 	    break;
41911 	  case DFmode:
41912 	    index = 1;
41913 	    break;
41914 	  case XFmode:
41915 	    index = 2;
41916 	    break;
41917 	  default:
41918 	    return 100;
41919 	}
41920       if (in == 2)
41921         return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41922       return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41923     }
41924   if (SSE_CLASS_P (regclass))
41925     {
41926       int index;
41927       switch (GET_MODE_SIZE (mode))
41928 	{
41929 	  case 4:
41930 	    index = 0;
41931 	    break;
41932 	  case 8:
41933 	    index = 1;
41934 	    break;
41935 	  case 16:
41936 	    index = 2;
41937 	    break;
41938 	  default:
41939 	    return 100;
41940 	}
41941       if (in == 2)
41942         return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41943       return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41944     }
41945   if (MMX_CLASS_P (regclass))
41946     {
41947       int index;
41948       switch (GET_MODE_SIZE (mode))
41949 	{
41950 	  case 4:
41951 	    index = 0;
41952 	    break;
41953 	  case 8:
41954 	    index = 1;
41955 	    break;
41956 	  default:
41957 	    return 100;
41958 	}
41959       if (in)
41960         return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41961       return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41962     }
41963   switch (GET_MODE_SIZE (mode))
41964     {
41965       case 1:
41966 	if (Q_CLASS_P (regclass) || TARGET_64BIT)
41967 	  {
41968 	    if (!in)
41969 	      return ix86_cost->int_store[0];
41970 	    if (TARGET_PARTIAL_REG_DEPENDENCY
41971 	        && optimize_function_for_speed_p (cfun))
41972 	      cost = ix86_cost->movzbl_load;
41973 	    else
41974 	      cost = ix86_cost->int_load[0];
41975 	    if (in == 2)
41976 	      return MAX (cost, ix86_cost->int_store[0]);
41977 	    return cost;
41978 	  }
41979 	else
41980 	  {
41981 	   if (in == 2)
41982 	     return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41983 	   if (in)
41984 	     return ix86_cost->movzbl_load;
41985 	   else
41986 	     return ix86_cost->int_store[0] + 4;
41987 	  }
41988 	break;
41989       case 2:
41990 	if (in == 2)
41991 	  return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41992 	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41993       default:
41994 	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
41995 	if (mode == TFmode)
41996 	  mode = XFmode;
41997 	if (in == 2)
41998 	  cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41999 	else if (in)
42000 	  cost = ix86_cost->int_load[2];
42001 	else
42002 	  cost = ix86_cost->int_store[2];
42003 	return (cost * (((int) GET_MODE_SIZE (mode)
42004 		        + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
42005     }
42006 }
42007 
42008 static int
42009 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
42010 		       bool in)
42011 {
42012   return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
42013 }
42014 
42015 
42016 /* Return the cost of moving data from a register in class CLASS1 to
42017    one in class CLASS2.
42018 
42019    It is not required that the cost always equal 2 when FROM is the same as TO;
42020    on some machines it is expensive to move between registers if they are not
42021    general registers.  */
42022 
42023 static int
42024 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
42025 			 reg_class_t class2_i)
42026 {
42027   enum reg_class class1 = (enum reg_class) class1_i;
42028   enum reg_class class2 = (enum reg_class) class2_i;
42029 
42030   /* In case we require secondary memory, compute cost of the store followed
42031      by load.  In order to avoid bad register allocation choices, we need
42032      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
42033 
42034   if (inline_secondary_memory_needed (class1, class2, mode, 0))
42035     {
42036       int cost = 1;
42037 
42038       cost += inline_memory_move_cost (mode, class1, 2);
42039       cost += inline_memory_move_cost (mode, class2, 2);
42040 
42041       /* In case of copying from general_purpose_register we may emit multiple
42042          stores followed by single load causing memory size mismatch stall.
42043          Count this as arbitrarily high cost of 20.  */
42044       if (targetm.class_max_nregs (class1, mode)
42045 	  > targetm.class_max_nregs (class2, mode))
42046 	cost += 20;
42047 
42048       /* In the case of FP/MMX moves, the registers actually overlap, and we
42049 	 have to switch modes in order to treat them differently.  */
42050       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
42051           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
42052 	cost += 20;
42053 
42054       return cost;
42055     }
42056 
42057   /* Moves between SSE/MMX and integer unit are expensive.  */
42058   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
42059       || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
42060 
42061     /* ??? By keeping returned value relatively high, we limit the number
42062        of moves between integer and MMX/SSE registers for all targets.
42063        Additionally, high value prevents problem with x86_modes_tieable_p(),
42064        where integer modes in MMX/SSE registers are not tieable
42065        because of missing QImode and HImode moves to, from or between
42066        MMX/SSE registers.  */
42067     return MAX (8, ix86_cost->mmxsse_to_integer);
42068 
42069   if (MAYBE_FLOAT_CLASS_P (class1))
42070     return ix86_cost->fp_move;
42071   if (MAYBE_SSE_CLASS_P (class1))
42072     return ix86_cost->sse_move;
42073   if (MAYBE_MMX_CLASS_P (class1))
42074     return ix86_cost->mmx_move;
42075   return 2;
42076 }
42077 
42078 /* Return TRUE if hard register REGNO can hold a value of machine-mode
42079    MODE.  */
42080 
42081 bool
42082 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
42083 {
42084   /* Flags and only flags can only hold CCmode values.  */
42085   if (CC_REGNO_P (regno))
42086     return GET_MODE_CLASS (mode) == MODE_CC;
42087   if (GET_MODE_CLASS (mode) == MODE_CC
42088       || GET_MODE_CLASS (mode) == MODE_RANDOM
42089       || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
42090     return false;
42091   if (STACK_REGNO_P (regno))
42092     return VALID_FP_MODE_P (mode);
42093   if (MASK_REGNO_P (regno))
42094     return (VALID_MASK_REG_MODE (mode)
42095 	    || (TARGET_AVX512BW
42096 		&& VALID_MASK_AVX512BW_MODE (mode)));
42097   if (BND_REGNO_P (regno))
42098     return VALID_BND_REG_MODE (mode);
42099   if (SSE_REGNO_P (regno))
42100     {
42101       /* We implement the move patterns for all vector modes into and
42102 	 out of SSE registers, even when no operation instructions
42103 	 are available.  */
42104 
42105       /* For AVX-512 we allow, regardless of regno:
42106 	  - XI mode
42107 	  - any of 512-bit wide vector mode
42108 	  - any scalar mode.  */
42109       if (TARGET_AVX512F
42110 	  && (mode == XImode
42111 	      || VALID_AVX512F_REG_MODE (mode)
42112 	      || VALID_AVX512F_SCALAR_MODE (mode)))
42113 	return true;
42114 
42115       /* TODO check for QI/HI scalars.  */
42116       /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
42117       if (TARGET_AVX512VL
42118 	  && (mode == OImode
42119 	      || mode == TImode
42120 	      || VALID_AVX256_REG_MODE (mode)
42121 	      || VALID_AVX512VL_128_REG_MODE (mode)))
42122 	return true;
42123 
42124       /* xmm16-xmm31 are only available for AVX-512.  */
42125       if (EXT_REX_SSE_REGNO_P (regno))
42126 	return false;
42127 
42128       /* OImode and AVX modes are available only when AVX is enabled.  */
42129       return ((TARGET_AVX
42130 	       && VALID_AVX256_REG_OR_OI_MODE (mode))
42131 	      || VALID_SSE_REG_MODE (mode)
42132 	      || VALID_SSE2_REG_MODE (mode)
42133 	      || VALID_MMX_REG_MODE (mode)
42134 	      || VALID_MMX_REG_MODE_3DNOW (mode));
42135     }
42136   if (MMX_REGNO_P (regno))
42137     {
42138       /* We implement the move patterns for 3DNOW modes even in MMX mode,
42139 	 so if the register is available at all, then we can move data of
42140 	 the given mode into or out of it.  */
42141       return (VALID_MMX_REG_MODE (mode)
42142 	      || VALID_MMX_REG_MODE_3DNOW (mode));
42143     }
42144 
42145   if (mode == QImode)
42146     {
42147       /* Take care for QImode values - they can be in non-QI regs,
42148 	 but then they do cause partial register stalls.  */
42149       if (ANY_QI_REGNO_P (regno))
42150 	return true;
42151       if (!TARGET_PARTIAL_REG_STALL)
42152 	return true;
42153       /* LRA checks if the hard register is OK for the given mode.
42154 	 QImode values can live in non-QI regs, so we allow all
42155 	 registers here.  */
42156       if (lra_in_progress)
42157        return true;
42158       return !can_create_pseudo_p ();
42159     }
42160   /* We handle both integer and floats in the general purpose registers.  */
42161   else if (VALID_INT_MODE_P (mode))
42162     return true;
42163   else if (VALID_FP_MODE_P (mode))
42164     return true;
42165   else if (VALID_DFP_MODE_P (mode))
42166     return true;
42167   /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
42168      on to use that value in smaller contexts, this can easily force a
42169      pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
42170      supporting DImode, allow it.  */
42171   else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
42172     return true;
42173 
42174   return false;
42175 }
42176 
42177 /* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
42178    tieable integer mode.  */
42179 
42180 static bool
42181 ix86_tieable_integer_mode_p (machine_mode mode)
42182 {
42183   switch (mode)
42184     {
42185     case HImode:
42186     case SImode:
42187       return true;
42188 
42189     case QImode:
42190       return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
42191 
42192     case DImode:
42193       return TARGET_64BIT;
42194 
42195     default:
42196       return false;
42197     }
42198 }
42199 
42200 /* Return true if MODE1 is accessible in a register that can hold MODE2
42201    without copying.  That is, all register classes that can hold MODE2
42202    can also hold MODE1.  */
42203 
42204 bool
42205 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
42206 {
42207   if (mode1 == mode2)
42208     return true;
42209 
42210   if (ix86_tieable_integer_mode_p (mode1)
42211       && ix86_tieable_integer_mode_p (mode2))
42212     return true;
42213 
42214   /* MODE2 being XFmode implies fp stack or general regs, which means we
42215      can tie any smaller floating point modes to it.  Note that we do not
42216      tie this with TFmode.  */
42217   if (mode2 == XFmode)
42218     return mode1 == SFmode || mode1 == DFmode;
42219 
42220   /* MODE2 being DFmode implies fp stack, general or sse regs, which means
42221      that we can tie it with SFmode.  */
42222   if (mode2 == DFmode)
42223     return mode1 == SFmode;
42224 
42225   /* If MODE2 is only appropriate for an SSE register, then tie with
42226      any other mode acceptable to SSE registers.  */
42227   if (GET_MODE_SIZE (mode2) == 32
42228       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42229     return (GET_MODE_SIZE (mode1) == 32
42230 	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42231   if (GET_MODE_SIZE (mode2) == 16
42232       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42233     return (GET_MODE_SIZE (mode1) == 16
42234 	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42235 
42236   /* If MODE2 is appropriate for an MMX register, then tie
42237      with any other mode acceptable to MMX registers.  */
42238   if (GET_MODE_SIZE (mode2) == 8
42239       && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
42240     return (GET_MODE_SIZE (mode1) == 8
42241 	    && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
42242 
42243   return false;
42244 }
42245 
42246 /* Return the cost of moving between two registers of mode MODE.  */
42247 
42248 static int
42249 ix86_set_reg_reg_cost (machine_mode mode)
42250 {
42251   unsigned int units = UNITS_PER_WORD;
42252 
42253   switch (GET_MODE_CLASS (mode))
42254     {
42255     default:
42256       break;
42257 
42258     case MODE_CC:
42259       units = GET_MODE_SIZE (CCmode);
42260       break;
42261 
42262     case MODE_FLOAT:
42263       if ((TARGET_SSE && mode == TFmode)
42264 	  || (TARGET_80387 && mode == XFmode)
42265 	  || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
42266 	  || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
42267 	units = GET_MODE_SIZE (mode);
42268       break;
42269 
42270     case MODE_COMPLEX_FLOAT:
42271       if ((TARGET_SSE && mode == TCmode)
42272 	  || (TARGET_80387 && mode == XCmode)
42273 	  || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
42274 	  || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
42275 	units = GET_MODE_SIZE (mode);
42276       break;
42277 
42278     case MODE_VECTOR_INT:
42279     case MODE_VECTOR_FLOAT:
42280       if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42281 	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42282 	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42283 	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42284 	  || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42285 	units = GET_MODE_SIZE (mode);
42286     }
42287 
42288   /* Return the cost of moving between two registers of mode MODE,
42289      assuming that the move will be in pieces of at most UNITS bytes.  */
42290   return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42291 }
42292 
42293 /* Compute a (partial) cost for rtx X.  Return true if the complete
42294    cost has been computed, and false if subexpressions should be
42295    scanned.  In either case, *TOTAL contains the cost result.  */
42296 
42297 static bool
42298 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
42299 		bool speed)
42300 {
42301   rtx mask;
42302   enum rtx_code code = (enum rtx_code) code_i;
42303   enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42304   machine_mode mode = GET_MODE (x);
42305   const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42306 
42307   switch (code)
42308     {
42309     case SET:
42310       if (register_operand (SET_DEST (x), VOIDmode)
42311 	  && reg_or_0_operand (SET_SRC (x), VOIDmode))
42312 	{
42313 	  *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42314 	  return true;
42315 	}
42316       return false;
42317 
42318     case CONST_INT:
42319     case CONST:
42320     case LABEL_REF:
42321     case SYMBOL_REF:
42322       if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42323 	*total = 3;
42324       else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42325 	*total = 2;
42326       else if (flag_pic && SYMBOLIC_CONST (x)
42327 	       && !(TARGET_64BIT
42328 		    && (GET_CODE (x) == LABEL_REF
42329 			|| (GET_CODE (x) == SYMBOL_REF
42330 			    && SYMBOL_REF_LOCAL_P (x)))))
42331 	*total = 1;
42332       else
42333 	*total = 0;
42334       return true;
42335 
42336     case CONST_DOUBLE:
42337       if (mode == VOIDmode)
42338 	{
42339 	  *total = 0;
42340 	  return true;
42341 	}
42342       switch (standard_80387_constant_p (x))
42343 	{
42344 	case 1: /* 0.0 */
42345 	  *total = 1;
42346 	  return true;
42347 	default: /* Other constants */
42348 	  *total = 2;
42349 	  return true;
42350 	case 0:
42351 	case -1:
42352 	  break;
42353 	}
42354       if (SSE_FLOAT_MODE_P (mode))
42355 	{
42356     case CONST_VECTOR:
42357 	  switch (standard_sse_constant_p (x))
42358 	    {
42359 	    case 0:
42360 	      break;
42361 	    case 1:  /* 0: xor eliminates false dependency */
42362 	      *total = 0;
42363 	      return true;
42364 	    default: /* -1: cmp contains false dependency */
42365 	      *total = 1;
42366 	      return true;
42367 	    }
42368 	}
42369       /* Fall back to (MEM (SYMBOL_REF)), since that's where
42370 	 it'll probably end up.  Add a penalty for size.  */
42371       *total = (COSTS_N_INSNS (1)
42372 		+ (flag_pic != 0 && !TARGET_64BIT)
42373 		+ (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42374       return true;
42375 
42376     case ZERO_EXTEND:
42377       /* The zero extensions is often completely free on x86_64, so make
42378 	 it as cheap as possible.  */
42379       if (TARGET_64BIT && mode == DImode
42380 	  && GET_MODE (XEXP (x, 0)) == SImode)
42381 	*total = 1;
42382       else if (TARGET_ZERO_EXTEND_WITH_AND)
42383 	*total = cost->add;
42384       else
42385 	*total = cost->movzx;
42386       return false;
42387 
42388     case SIGN_EXTEND:
42389       *total = cost->movsx;
42390       return false;
42391 
42392     case ASHIFT:
42393       if (SCALAR_INT_MODE_P (mode)
42394 	  && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42395 	  && CONST_INT_P (XEXP (x, 1)))
42396 	{
42397 	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42398 	  if (value == 1)
42399 	    {
42400 	      *total = cost->add;
42401 	      return false;
42402 	    }
42403 	  if ((value == 2 || value == 3)
42404 	      && cost->lea <= cost->shift_const)
42405 	    {
42406 	      *total = cost->lea;
42407 	      return false;
42408 	    }
42409 	}
42410       /* FALLTHRU */
42411 
42412     case ROTATE:
42413     case ASHIFTRT:
42414     case LSHIFTRT:
42415     case ROTATERT:
42416       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42417 	{
42418 	  /* ??? Should be SSE vector operation cost.  */
42419 	  /* At least for published AMD latencies, this really is the same
42420 	     as the latency for a simple fpu operation like fabs.  */
42421 	  /* V*QImode is emulated with 1-11 insns.  */
42422 	  if (mode == V16QImode || mode == V32QImode)
42423 	    {
42424 	      int count = 11;
42425 	      if (TARGET_XOP && mode == V16QImode)
42426 		{
42427 		  /* For XOP we use vpshab, which requires a broadcast of the
42428 		     value to the variable shift insn.  For constants this
42429 		     means a V16Q const in mem; even when we can perform the
42430 		     shift with one insn set the cost to prefer paddb.  */
42431 		  if (CONSTANT_P (XEXP (x, 1)))
42432 		    {
42433 		      *total = (cost->fabs
42434 				+ rtx_cost (XEXP (x, 0), code, 0, speed)
42435 				+ (speed ? 2 : COSTS_N_BYTES (16)));
42436 		      return true;
42437 		    }
42438 		  count = 3;
42439 		}
42440 	      else if (TARGET_SSSE3)
42441 		count = 7;
42442 	      *total = cost->fabs * count;
42443 	    }
42444 	  else
42445 	    *total = cost->fabs;
42446 	}
42447       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42448 	{
42449 	  if (CONST_INT_P (XEXP (x, 1)))
42450 	    {
42451 	      if (INTVAL (XEXP (x, 1)) > 32)
42452 		*total = cost->shift_const + COSTS_N_INSNS (2);
42453 	      else
42454 		*total = cost->shift_const * 2;
42455 	    }
42456 	  else
42457 	    {
42458 	      if (GET_CODE (XEXP (x, 1)) == AND)
42459 		*total = cost->shift_var * 2;
42460 	      else
42461 		*total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42462 	    }
42463 	}
42464       else
42465 	{
42466 	  if (CONST_INT_P (XEXP (x, 1)))
42467 	    *total = cost->shift_const;
42468 	  else if (GET_CODE (XEXP (x, 1)) == SUBREG
42469 		   && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42470 	    {
42471 	      /* Return the cost after shift-and truncation.  */
42472 	      *total = cost->shift_var;
42473 	      return true;
42474 	    }
42475 	  else
42476 	    *total = cost->shift_var;
42477 	}
42478       return false;
42479 
42480     case FMA:
42481       {
42482 	rtx sub;
42483 
42484         gcc_assert (FLOAT_MODE_P (mode));
42485         gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42486 
42487         /* ??? SSE scalar/vector cost should be used here.  */
42488         /* ??? Bald assumption that fma has the same cost as fmul.  */
42489         *total = cost->fmul;
42490 	*total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42491 
42492         /* Negate in op0 or op2 is free: FMS, FNMA, FNMS.  */
42493 	sub = XEXP (x, 0);
42494 	if (GET_CODE (sub) == NEG)
42495 	  sub = XEXP (sub, 0);
42496 	*total += rtx_cost (sub, FMA, 0, speed);
42497 
42498 	sub = XEXP (x, 2);
42499 	if (GET_CODE (sub) == NEG)
42500 	  sub = XEXP (sub, 0);
42501 	*total += rtx_cost (sub, FMA, 2, speed);
42502 	return true;
42503       }
42504 
42505     case MULT:
42506       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42507 	{
42508 	  /* ??? SSE scalar cost should be used here.  */
42509 	  *total = cost->fmul;
42510 	  return false;
42511 	}
42512       else if (X87_FLOAT_MODE_P (mode))
42513 	{
42514 	  *total = cost->fmul;
42515 	  return false;
42516 	}
42517       else if (FLOAT_MODE_P (mode))
42518 	{
42519 	  /* ??? SSE vector cost should be used here.  */
42520 	  *total = cost->fmul;
42521 	  return false;
42522 	}
42523       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42524 	{
42525 	  /* V*QImode is emulated with 7-13 insns.  */
42526 	  if (mode == V16QImode || mode == V32QImode)
42527 	    {
42528 	      int extra = 11;
42529 	      if (TARGET_XOP && mode == V16QImode)
42530 		extra = 5;
42531 	      else if (TARGET_SSSE3)
42532 		extra = 6;
42533 	      *total = cost->fmul * 2 + cost->fabs * extra;
42534 	    }
42535 	  /* V*DImode is emulated with 5-8 insns.  */
42536 	  else if (mode == V2DImode || mode == V4DImode)
42537 	    {
42538 	      if (TARGET_XOP && mode == V2DImode)
42539 		*total = cost->fmul * 2 + cost->fabs * 3;
42540 	      else
42541 		*total = cost->fmul * 3 + cost->fabs * 5;
42542 	    }
42543 	  /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42544 	     insns, including two PMULUDQ.  */
42545 	  else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42546 	    *total = cost->fmul * 2 + cost->fabs * 5;
42547 	  else
42548 	    *total = cost->fmul;
42549 	  return false;
42550 	}
42551       else
42552 	{
42553 	  rtx op0 = XEXP (x, 0);
42554 	  rtx op1 = XEXP (x, 1);
42555 	  int nbits;
42556 	  if (CONST_INT_P (XEXP (x, 1)))
42557 	    {
42558 	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42559 	      for (nbits = 0; value != 0; value &= value - 1)
42560 	        nbits++;
42561 	    }
42562 	  else
42563 	    /* This is arbitrary.  */
42564 	    nbits = 7;
42565 
42566 	  /* Compute costs correctly for widening multiplication.  */
42567 	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42568 	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42569 	         == GET_MODE_SIZE (mode))
42570 	    {
42571 	      int is_mulwiden = 0;
42572 	      machine_mode inner_mode = GET_MODE (op0);
42573 
42574 	      if (GET_CODE (op0) == GET_CODE (op1))
42575 		is_mulwiden = 1, op1 = XEXP (op1, 0);
42576 	      else if (CONST_INT_P (op1))
42577 		{
42578 		  if (GET_CODE (op0) == SIGN_EXTEND)
42579 		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42580 			          == INTVAL (op1);
42581 		  else
42582 		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42583 	        }
42584 
42585 	      if (is_mulwiden)
42586 	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42587 	    }
42588 
42589   	  *total = (cost->mult_init[MODE_INDEX (mode)]
42590 		    + nbits * cost->mult_bit
42591 	            + rtx_cost (op0, outer_code, opno, speed)
42592 		    + rtx_cost (op1, outer_code, opno, speed));
42593 
42594           return true;
42595 	}
42596 
42597     case DIV:
42598     case UDIV:
42599     case MOD:
42600     case UMOD:
42601       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42602 	/* ??? SSE cost should be used here.  */
42603 	*total = cost->fdiv;
42604       else if (X87_FLOAT_MODE_P (mode))
42605 	*total = cost->fdiv;
42606       else if (FLOAT_MODE_P (mode))
42607 	/* ??? SSE vector cost should be used here.  */
42608 	*total = cost->fdiv;
42609       else
42610 	*total = cost->divide[MODE_INDEX (mode)];
42611       return false;
42612 
42613     case PLUS:
42614       if (GET_MODE_CLASS (mode) == MODE_INT
42615 	  && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42616 	{
42617 	  if (GET_CODE (XEXP (x, 0)) == PLUS
42618 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42619 	      && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42620 	      && CONSTANT_P (XEXP (x, 1)))
42621 	    {
42622 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42623 	      if (val == 2 || val == 4 || val == 8)
42624 		{
42625 		  *total = cost->lea;
42626 		  *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42627 				      outer_code, opno, speed);
42628 		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42629 				      outer_code, opno, speed);
42630 		  *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42631 		  return true;
42632 		}
42633 	    }
42634 	  else if (GET_CODE (XEXP (x, 0)) == MULT
42635 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42636 	    {
42637 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42638 	      if (val == 2 || val == 4 || val == 8)
42639 		{
42640 		  *total = cost->lea;
42641 		  *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42642 				      outer_code, opno, speed);
42643 		  *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42644 		  return true;
42645 		}
42646 	    }
42647 	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
42648 	    {
42649 	      *total = cost->lea;
42650 	      *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42651 				  outer_code, opno, speed);
42652 	      *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42653 				  outer_code, opno, speed);
42654 	      *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42655 	      return true;
42656 	    }
42657 	}
42658       /* FALLTHRU */
42659 
42660     case MINUS:
42661       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42662 	{
42663 	  /* ??? SSE cost should be used here.  */
42664 	  *total = cost->fadd;
42665 	  return false;
42666 	}
42667       else if (X87_FLOAT_MODE_P (mode))
42668 	{
42669 	  *total = cost->fadd;
42670 	  return false;
42671 	}
42672       else if (FLOAT_MODE_P (mode))
42673 	{
42674 	  /* ??? SSE vector cost should be used here.  */
42675 	  *total = cost->fadd;
42676 	  return false;
42677 	}
42678       /* FALLTHRU */
42679 
42680     case AND:
42681     case IOR:
42682     case XOR:
42683       if (GET_MODE_CLASS (mode) == MODE_INT
42684 	  && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42685 	{
42686 	  *total = (cost->add * 2
42687 		    + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42688 		       << (GET_MODE (XEXP (x, 0)) != DImode))
42689 		    + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42690 	               << (GET_MODE (XEXP (x, 1)) != DImode)));
42691 	  return true;
42692 	}
42693       /* FALLTHRU */
42694 
42695     case NEG:
42696       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42697 	{
42698 	  /* ??? SSE cost should be used here.  */
42699 	  *total = cost->fchs;
42700 	  return false;
42701 	}
42702       else if (X87_FLOAT_MODE_P (mode))
42703 	{
42704 	  *total = cost->fchs;
42705 	  return false;
42706 	}
42707       else if (FLOAT_MODE_P (mode))
42708 	{
42709 	  /* ??? SSE vector cost should be used here.  */
42710 	  *total = cost->fchs;
42711 	  return false;
42712 	}
42713       /* FALLTHRU */
42714 
42715     case NOT:
42716       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42717 	{
42718 	  /* ??? Should be SSE vector operation cost.  */
42719 	  /* At least for published AMD latencies, this really is the same
42720 	     as the latency for a simple fpu operation like fabs.  */
42721 	  *total = cost->fabs;
42722 	}
42723       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42724 	*total = cost->add * 2;
42725       else
42726 	*total = cost->add;
42727       return false;
42728 
42729     case COMPARE:
42730       if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42731 	  && XEXP (XEXP (x, 0), 1) == const1_rtx
42732 	  && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42733 	  && XEXP (x, 1) == const0_rtx)
42734 	{
42735 	  /* This kind of construct is implemented using test[bwl].
42736 	     Treat it as if we had an AND.  */
42737 	  *total = (cost->add
42738 		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42739 		    + rtx_cost (const1_rtx, outer_code, opno, speed));
42740 	  return true;
42741 	}
42742       return false;
42743 
42744     case FLOAT_EXTEND:
42745       if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42746 	*total = 0;
42747       return false;
42748 
42749     case ABS:
42750       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42751 	/* ??? SSE cost should be used here.  */
42752 	*total = cost->fabs;
42753       else if (X87_FLOAT_MODE_P (mode))
42754 	*total = cost->fabs;
42755       else if (FLOAT_MODE_P (mode))
42756 	/* ??? SSE vector cost should be used here.  */
42757 	*total = cost->fabs;
42758       return false;
42759 
42760     case SQRT:
42761       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42762 	/* ??? SSE cost should be used here.  */
42763 	*total = cost->fsqrt;
42764       else if (X87_FLOAT_MODE_P (mode))
42765 	*total = cost->fsqrt;
42766       else if (FLOAT_MODE_P (mode))
42767 	/* ??? SSE vector cost should be used here.  */
42768 	*total = cost->fsqrt;
42769       return false;
42770 
42771     case UNSPEC:
42772       if (XINT (x, 1) == UNSPEC_TP)
42773 	*total = 0;
42774       return false;
42775 
42776     case VEC_SELECT:
42777     case VEC_CONCAT:
42778     case VEC_DUPLICATE:
42779       /* ??? Assume all of these vector manipulation patterns are
42780 	 recognizable.  In which case they all pretty much have the
42781 	 same cost.  */
42782      *total = cost->fabs;
42783      return true;
42784     case VEC_MERGE:
42785       mask = XEXP (x, 2);
42786       /* This is masked instruction, assume the same cost,
42787 	 as nonmasked variant.  */
42788       if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42789 	*total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42790       else
42791 	*total = cost->fabs;
42792       return true;
42793 
42794     default:
42795       return false;
42796     }
42797 }
42798 
42799 #if TARGET_MACHO
42800 
42801 static int current_machopic_label_num;
42802 
42803 /* Given a symbol name and its associated stub, write out the
42804    definition of the stub.  */
42805 
42806 void
42807 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42808 {
42809   unsigned int length;
42810   char *binder_name, *symbol_name, lazy_ptr_name[32];
42811   int label = ++current_machopic_label_num;
42812 
42813   /* For 64-bit we shouldn't get here.  */
42814   gcc_assert (!TARGET_64BIT);
42815 
42816   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
42817   symb = targetm.strip_name_encoding (symb);
42818 
42819   length = strlen (stub);
42820   binder_name = XALLOCAVEC (char, length + 32);
42821   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42822 
42823   length = strlen (symb);
42824   symbol_name = XALLOCAVEC (char, length + 32);
42825   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42826 
42827   sprintf (lazy_ptr_name, "L%d$lz", label);
42828 
42829   if (MACHOPIC_ATT_STUB)
42830     switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42831   else if (MACHOPIC_PURE)
42832     switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42833   else
42834     switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42835 
42836   fprintf (file, "%s:\n", stub);
42837   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42838 
42839   if (MACHOPIC_ATT_STUB)
42840     {
42841       fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42842     }
42843   else if (MACHOPIC_PURE)
42844     {
42845       /* PIC stub.  */
42846       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
42847       rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42848       output_set_got (tmp, NULL_RTX);	/* "CALL ___<cpu>.get_pc_thunk.cx".  */
42849       fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42850 	       label, lazy_ptr_name, label);
42851       fprintf (file, "\tjmp\t*%%ecx\n");
42852     }
42853   else
42854     fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42855 
42856   /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42857      it needs no stub-binding-helper.  */
42858   if (MACHOPIC_ATT_STUB)
42859     return;
42860 
42861   fprintf (file, "%s:\n", binder_name);
42862 
42863   if (MACHOPIC_PURE)
42864     {
42865       fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42866       fprintf (file, "\tpushl\t%%ecx\n");
42867     }
42868   else
42869     fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42870 
42871   fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42872 
42873   /* N.B. Keep the correspondence of these
42874      'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42875      old-pic/new-pic/non-pic stubs; altering this will break
42876      compatibility with existing dylibs.  */
42877   if (MACHOPIC_PURE)
42878     {
42879       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
42880       switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42881     }
42882   else
42883     /* 16-byte -mdynamic-no-pic stub.  */
42884     switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42885 
42886   fprintf (file, "%s:\n", lazy_ptr_name);
42887   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42888   fprintf (file, ASM_LONG "%s\n", binder_name);
42889 }
42890 #endif /* TARGET_MACHO */
42891 
42892 /* Order the registers for register allocator.  */
42893 
42894 void
42895 x86_order_regs_for_local_alloc (void)
42896 {
42897    int pos = 0;
42898    int i;
42899 
42900    /* First allocate the local general purpose registers.  */
42901    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42902      if (GENERAL_REGNO_P (i) && call_used_regs[i])
42903 	reg_alloc_order [pos++] = i;
42904 
42905    /* Global general purpose registers.  */
42906    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42907      if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42908 	reg_alloc_order [pos++] = i;
42909 
42910    /* x87 registers come first in case we are doing FP math
42911       using them.  */
42912    if (!TARGET_SSE_MATH)
42913      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42914        reg_alloc_order [pos++] = i;
42915 
42916    /* SSE registers.  */
42917    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42918      reg_alloc_order [pos++] = i;
42919    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42920      reg_alloc_order [pos++] = i;
42921 
42922    /* Extended REX SSE registers.  */
42923    for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42924      reg_alloc_order [pos++] = i;
42925 
42926    /* Mask register.  */
42927    for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42928      reg_alloc_order [pos++] = i;
42929 
42930    /* MPX bound registers.  */
42931    for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42932      reg_alloc_order [pos++] = i;
42933 
42934    /* x87 registers.  */
42935    if (TARGET_SSE_MATH)
42936      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42937        reg_alloc_order [pos++] = i;
42938 
42939    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42940      reg_alloc_order [pos++] = i;
42941 
42942    /* Initialize the rest of array as we do not allocate some registers
42943       at all.  */
42944    while (pos < FIRST_PSEUDO_REGISTER)
42945      reg_alloc_order [pos++] = 0;
42946 }
42947 
42948 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42949    in struct attribute_spec handler.  */
42950 static tree
42951 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42952 					      tree args,
42953 					      int,
42954 					      bool *no_add_attrs)
42955 {
42956   if (TREE_CODE (*node) != FUNCTION_TYPE
42957       && TREE_CODE (*node) != METHOD_TYPE
42958       && TREE_CODE (*node) != FIELD_DECL
42959       && TREE_CODE (*node) != TYPE_DECL)
42960     {
42961       warning (OPT_Wattributes, "%qE attribute only applies to functions",
42962 	       name);
42963       *no_add_attrs = true;
42964       return NULL_TREE;
42965     }
42966   if (TARGET_64BIT)
42967     {
42968       warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42969 	       name);
42970       *no_add_attrs = true;
42971       return NULL_TREE;
42972     }
42973   if (is_attribute_p ("callee_pop_aggregate_return", name))
42974     {
42975       tree cst;
42976 
42977       cst = TREE_VALUE (args);
42978       if (TREE_CODE (cst) != INTEGER_CST)
42979 	{
42980 	  warning (OPT_Wattributes,
42981 		   "%qE attribute requires an integer constant argument",
42982 		   name);
42983 	  *no_add_attrs = true;
42984 	}
42985       else if (compare_tree_int (cst, 0) != 0
42986 	       && compare_tree_int (cst, 1) != 0)
42987 	{
42988 	  warning (OPT_Wattributes,
42989 		   "argument to %qE attribute is neither zero, nor one",
42990 		   name);
42991 	  *no_add_attrs = true;
42992 	}
42993 
42994       return NULL_TREE;
42995     }
42996 
42997   return NULL_TREE;
42998 }
42999 
43000 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
43001    struct attribute_spec.handler.  */
43002 static tree
43003 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
43004 			   bool *no_add_attrs)
43005 {
43006   if (TREE_CODE (*node) != FUNCTION_TYPE
43007       && TREE_CODE (*node) != METHOD_TYPE
43008       && TREE_CODE (*node) != FIELD_DECL
43009       && TREE_CODE (*node) != TYPE_DECL)
43010     {
43011       warning (OPT_Wattributes, "%qE attribute only applies to functions",
43012 	       name);
43013       *no_add_attrs = true;
43014       return NULL_TREE;
43015     }
43016 
43017   /* Can combine regparm with all attributes but fastcall.  */
43018   if (is_attribute_p ("ms_abi", name))
43019     {
43020       if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
43021         {
43022 	  error ("ms_abi and sysv_abi attributes are not compatible");
43023 	}
43024 
43025       return NULL_TREE;
43026     }
43027   else if (is_attribute_p ("sysv_abi", name))
43028     {
43029       if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
43030         {
43031 	  error ("ms_abi and sysv_abi attributes are not compatible");
43032 	}
43033 
43034       return NULL_TREE;
43035     }
43036 
43037   return NULL_TREE;
43038 }
43039 
43040 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
43041    struct attribute_spec.handler.  */
43042 static tree
43043 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
43044 			      bool *no_add_attrs)
43045 {
43046   tree *type = NULL;
43047   if (DECL_P (*node))
43048     {
43049       if (TREE_CODE (*node) == TYPE_DECL)
43050 	type = &TREE_TYPE (*node);
43051     }
43052   else
43053     type = node;
43054 
43055   if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
43056     {
43057       warning (OPT_Wattributes, "%qE attribute ignored",
43058 	       name);
43059       *no_add_attrs = true;
43060     }
43061 
43062   else if ((is_attribute_p ("ms_struct", name)
43063 	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
43064 	   || ((is_attribute_p ("gcc_struct", name)
43065 		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
43066     {
43067       warning (OPT_Wattributes, "%qE incompatible attribute ignored",
43068                name);
43069       *no_add_attrs = true;
43070     }
43071 
43072   return NULL_TREE;
43073 }
43074 
43075 static tree
43076 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
43077 			      bool *no_add_attrs)
43078 {
43079   if (TREE_CODE (*node) != FUNCTION_DECL)
43080     {
43081       warning (OPT_Wattributes, "%qE attribute only applies to functions",
43082                name);
43083       *no_add_attrs = true;
43084     }
43085   return NULL_TREE;
43086 }
43087 
43088 static bool
43089 ix86_ms_bitfield_layout_p (const_tree record_type)
43090 {
43091   return ((TARGET_MS_BITFIELD_LAYOUT
43092 	   && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
43093           || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
43094 }
43095 
43096 /* Returns an expression indicating where the this parameter is
43097    located on entry to the FUNCTION.  */
43098 
43099 static rtx
43100 x86_this_parameter (tree function)
43101 {
43102   tree type = TREE_TYPE (function);
43103   bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
43104   int nregs;
43105 
43106   if (TARGET_64BIT)
43107     {
43108       const int *parm_regs;
43109 
43110       if (ix86_function_type_abi (type) == MS_ABI)
43111         parm_regs = x86_64_ms_abi_int_parameter_registers;
43112       else
43113         parm_regs = x86_64_int_parameter_registers;
43114       return gen_rtx_REG (Pmode, parm_regs[aggr]);
43115     }
43116 
43117   nregs = ix86_function_regparm (type, function);
43118 
43119   if (nregs > 0 && !stdarg_p (type))
43120     {
43121       int regno;
43122       unsigned int ccvt = ix86_get_callcvt (type);
43123 
43124       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43125 	regno = aggr ? DX_REG : CX_REG;
43126       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43127         {
43128 	  regno = CX_REG;
43129 	  if (aggr)
43130 	    return gen_rtx_MEM (SImode,
43131 				plus_constant (Pmode, stack_pointer_rtx, 4));
43132 	}
43133       else
43134         {
43135 	  regno = AX_REG;
43136 	  if (aggr)
43137 	    {
43138 	      regno = DX_REG;
43139 	      if (nregs == 1)
43140 		return gen_rtx_MEM (SImode,
43141 				    plus_constant (Pmode,
43142 						   stack_pointer_rtx, 4));
43143 	    }
43144 	}
43145       return gen_rtx_REG (SImode, regno);
43146     }
43147 
43148   return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
43149 					     aggr ? 8 : 4));
43150 }
43151 
43152 /* Determine whether x86_output_mi_thunk can succeed.  */
43153 
43154 static bool
43155 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
43156 			 const_tree function)
43157 {
43158   /* 64-bit can handle anything.  */
43159   if (TARGET_64BIT)
43160     return true;
43161 
43162   /* For 32-bit, everything's fine if we have one free register.  */
43163   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
43164     return true;
43165 
43166   /* Need a free register for vcall_offset.  */
43167   if (vcall_offset)
43168     return false;
43169 
43170   /* Need a free register for GOT references.  */
43171   if (flag_pic && !targetm.binds_local_p (function))
43172     return false;
43173 
43174   /* Otherwise ok.  */
43175   return true;
43176 }
43177 
43178 /* Output the assembler code for a thunk function.  THUNK_DECL is the
43179    declaration for the thunk function itself, FUNCTION is the decl for
43180    the target function.  DELTA is an immediate constant offset to be
43181    added to THIS.  If VCALL_OFFSET is nonzero, the word at
43182    *(*this + vcall_offset) should be added to THIS.  */
43183 
43184 static void
43185 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
43186 		     HOST_WIDE_INT vcall_offset, tree function)
43187 {
43188   rtx this_param = x86_this_parameter (function);
43189   rtx this_reg, tmp, fnaddr;
43190   unsigned int tmp_regno;
43191   rtx_insn *insn;
43192 
43193   if (TARGET_64BIT)
43194     tmp_regno = R10_REG;
43195   else
43196     {
43197       unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
43198       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43199 	tmp_regno = AX_REG;
43200       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43201 	tmp_regno = DX_REG;
43202       else
43203 	tmp_regno = CX_REG;
43204     }
43205 
43206   emit_note (NOTE_INSN_PROLOGUE_END);
43207 
43208   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
43209      pull it in now and let DELTA benefit.  */
43210   if (REG_P (this_param))
43211     this_reg = this_param;
43212   else if (vcall_offset)
43213     {
43214       /* Put the this parameter into %eax.  */
43215       this_reg = gen_rtx_REG (Pmode, AX_REG);
43216       emit_move_insn (this_reg, this_param);
43217     }
43218   else
43219     this_reg = NULL_RTX;
43220 
43221   /* Adjust the this parameter by a fixed constant.  */
43222   if (delta)
43223     {
43224       rtx delta_rtx = GEN_INT (delta);
43225       rtx delta_dst = this_reg ? this_reg : this_param;
43226 
43227       if (TARGET_64BIT)
43228 	{
43229 	  if (!x86_64_general_operand (delta_rtx, Pmode))
43230 	    {
43231 	      tmp = gen_rtx_REG (Pmode, tmp_regno);
43232 	      emit_move_insn (tmp, delta_rtx);
43233 	      delta_rtx = tmp;
43234 	    }
43235 	}
43236 
43237       ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
43238     }
43239 
43240   /* Adjust the this parameter by a value stored in the vtable.  */
43241   if (vcall_offset)
43242     {
43243       rtx vcall_addr, vcall_mem, this_mem;
43244 
43245       tmp = gen_rtx_REG (Pmode, tmp_regno);
43246 
43247       this_mem = gen_rtx_MEM (ptr_mode, this_reg);
43248       if (Pmode != ptr_mode)
43249 	this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
43250       emit_move_insn (tmp, this_mem);
43251 
43252       /* Adjust the this parameter.  */
43253       vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
43254       if (TARGET_64BIT
43255 	  && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
43256 	{
43257 	  rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
43258 	  emit_move_insn (tmp2, GEN_INT (vcall_offset));
43259 	  vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
43260 	}
43261 
43262       vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
43263       if (Pmode != ptr_mode)
43264 	emit_insn (gen_addsi_1_zext (this_reg,
43265 				     gen_rtx_REG (ptr_mode,
43266 						  REGNO (this_reg)),
43267 				     vcall_mem));
43268       else
43269 	ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
43270     }
43271 
43272   /* If necessary, drop THIS back to its stack slot.  */
43273   if (this_reg && this_reg != this_param)
43274     emit_move_insn (this_param, this_reg);
43275 
43276   fnaddr = XEXP (DECL_RTL (function), 0);
43277   if (TARGET_64BIT)
43278     {
43279       if (!flag_pic || targetm.binds_local_p (function)
43280 	  || TARGET_PECOFF)
43281 	;
43282       else
43283 	{
43284 	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43285 	  tmp = gen_rtx_CONST (Pmode, tmp);
43286 	  fnaddr = gen_const_mem (Pmode, tmp);
43287 	}
43288     }
43289   else
43290     {
43291       if (!flag_pic || targetm.binds_local_p (function))
43292 	;
43293 #if TARGET_MACHO
43294       else if (TARGET_MACHO)
43295 	{
43296 	  fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43297 	  fnaddr = XEXP (fnaddr, 0);
43298 	}
43299 #endif /* TARGET_MACHO */
43300       else
43301 	{
43302 	  tmp = gen_rtx_REG (Pmode, CX_REG);
43303 	  output_set_got (tmp, NULL_RTX);
43304 
43305 	  fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43306 	  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43307 	  fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43308 	  fnaddr = gen_const_mem (Pmode, fnaddr);
43309 	}
43310     }
43311 
43312   /* Our sibling call patterns do not allow memories, because we have no
43313      predicate that can distinguish between frame and non-frame memory.
43314      For our purposes here, we can get away with (ab)using a jump pattern,
43315      because we're going to do no optimization.  */
43316   if (MEM_P (fnaddr))
43317     {
43318       if (sibcall_insn_operand (fnaddr, word_mode))
43319 	{
43320 	  fnaddr = XEXP (DECL_RTL (function), 0);
43321 	  tmp = gen_rtx_MEM (QImode, fnaddr);
43322 	  tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43323 	  tmp = emit_call_insn (tmp);
43324 	  SIBLING_CALL_P (tmp) = 1;
43325 	}
43326       else
43327 	emit_jump_insn (gen_indirect_jump (fnaddr));
43328     }
43329   else
43330     {
43331       if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43332 	{
43333 	  // CM_LARGE_PIC always uses pseudo PIC register which is
43334 	  // uninitialized.  Since FUNCTION is local and calling it
43335 	  // doesn't go through PLT, we use scratch register %r11 as
43336 	  // PIC register and initialize it here.
43337 	  pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43338 	  ix86_init_large_pic_reg (tmp_regno);
43339 	  fnaddr = legitimize_pic_address (fnaddr,
43340 					   gen_rtx_REG (Pmode, tmp_regno));
43341 	}
43342 
43343       if (!sibcall_insn_operand (fnaddr, word_mode))
43344 	{
43345 	  tmp = gen_rtx_REG (word_mode, tmp_regno);
43346 	  if (GET_MODE (fnaddr) != word_mode)
43347 	    fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43348 	  emit_move_insn (tmp, fnaddr);
43349 	  fnaddr = tmp;
43350 	}
43351 
43352       tmp = gen_rtx_MEM (QImode, fnaddr);
43353       tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43354       tmp = emit_call_insn (tmp);
43355       SIBLING_CALL_P (tmp) = 1;
43356     }
43357   emit_barrier ();
43358 
43359   /* Emit just enough of rest_of_compilation to get the insns emitted.
43360      Note that use_thunk calls assemble_start_function et al.  */
43361   insn = get_insns ();
43362   shorten_branches (insn);
43363   final_start_function (insn, file, 1);
43364   final (insn, file, 1);
43365   final_end_function ();
43366 }
43367 
43368 static void
43369 x86_file_start (void)
43370 {
43371   default_file_start ();
43372   if (TARGET_16BIT)
43373     fputs ("\t.code16gcc\n", asm_out_file);
43374 #if TARGET_MACHO
43375   darwin_file_start ();
43376 #endif
43377   if (X86_FILE_START_VERSION_DIRECTIVE)
43378     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43379   if (X86_FILE_START_FLTUSED)
43380     fputs ("\t.global\t__fltused\n", asm_out_file);
43381   if (ix86_asm_dialect == ASM_INTEL)
43382     fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43383 }
43384 
43385 int
43386 x86_field_alignment (tree field, int computed)
43387 {
43388   machine_mode mode;
43389   tree type = TREE_TYPE (field);
43390 
43391   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43392     return computed;
43393   mode = TYPE_MODE (strip_array_types (type));
43394   if (mode == DFmode || mode == DCmode
43395       || GET_MODE_CLASS (mode) == MODE_INT
43396       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43397     return MIN (32, computed);
43398   return computed;
43399 }
43400 
43401 /* Print call to TARGET to FILE.  */
43402 
43403 static void
43404 x86_print_call_or_nop (FILE *file, const char *target)
43405 {
43406   if (flag_nop_mcount)
43407     fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop.  */
43408   else
43409     fprintf (file, "1:\tcall\t%s\n", target);
43410 }
43411 
43412 /* Output assembler code to FILE to increment profiler label # LABELNO
43413    for profiling a function entry.  */
43414 void
43415 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43416 {
43417   const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43418 					 : MCOUNT_NAME);
43419   if (TARGET_64BIT)
43420     {
43421 #ifndef NO_PROFILE_COUNTERS
43422       fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43423 #endif
43424 
43425       if (!TARGET_PECOFF && flag_pic)
43426 	fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43427       else
43428 	x86_print_call_or_nop (file, mcount_name);
43429     }
43430   else if (flag_pic)
43431     {
43432 #ifndef NO_PROFILE_COUNTERS
43433       fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43434 	       LPREFIX, labelno);
43435 #endif
43436       fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43437     }
43438   else
43439     {
43440 #ifndef NO_PROFILE_COUNTERS
43441       fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43442 	       LPREFIX, labelno);
43443 #endif
43444       x86_print_call_or_nop (file, mcount_name);
43445     }
43446 
43447   if (flag_record_mcount)
43448     {
43449       fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43450       fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43451       fprintf (file, "\t.previous\n");
43452     }
43453 }
43454 
43455 /* We don't have exact information about the insn sizes, but we may assume
43456    quite safely that we are informed about all 1 byte insns and memory
43457    address sizes.  This is enough to eliminate unnecessary padding in
43458    99% of cases.  */
43459 
43460 static int
43461 min_insn_size (rtx_insn *insn)
43462 {
43463   int l = 0, len;
43464 
43465   if (!INSN_P (insn) || !active_insn_p (insn))
43466     return 0;
43467 
43468   /* Discard alignments we've emit and jump instructions.  */
43469   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43470       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43471     return 0;
43472 
43473   /* Important case - calls are always 5 bytes.
43474      It is common to have many calls in the row.  */
43475   if (CALL_P (insn)
43476       && symbolic_reference_mentioned_p (PATTERN (insn))
43477       && !SIBLING_CALL_P (insn))
43478     return 5;
43479   len = get_attr_length (insn);
43480   if (len <= 1)
43481     return 1;
43482 
43483   /* For normal instructions we rely on get_attr_length being exact,
43484      with a few exceptions.  */
43485   if (!JUMP_P (insn))
43486     {
43487       enum attr_type type = get_attr_type (insn);
43488 
43489       switch (type)
43490 	{
43491 	case TYPE_MULTI:
43492 	  if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43493 	      || asm_noperands (PATTERN (insn)) >= 0)
43494 	    return 0;
43495 	  break;
43496 	case TYPE_OTHER:
43497 	case TYPE_FCMP:
43498 	  break;
43499 	default:
43500 	  /* Otherwise trust get_attr_length.  */
43501 	  return len;
43502 	}
43503 
43504       l = get_attr_length_address (insn);
43505       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43506 	l = 4;
43507     }
43508   if (l)
43509     return 1+l;
43510   else
43511     return 2;
43512 }
43513 
43514 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43515 
43516 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43517    window.  */
43518 
43519 static void
43520 ix86_avoid_jump_mispredicts (void)
43521 {
43522   rtx_insn *insn, *start = get_insns ();
43523   int nbytes = 0, njumps = 0;
43524   bool isjump = false;
43525 
43526   /* Look for all minimal intervals of instructions containing 4 jumps.
43527      The intervals are bounded by START and INSN.  NBYTES is the total
43528      size of instructions in the interval including INSN and not including
43529      START.  When the NBYTES is smaller than 16 bytes, it is possible
43530      that the end of START and INSN ends up in the same 16byte page.
43531 
43532      The smallest offset in the page INSN can start is the case where START
43533      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
43534      We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43535 
43536      Don't consider asm goto as jump, while it can contain a jump, it doesn't
43537      have to, control transfer to label(s) can be performed through other
43538      means, and also we estimate minimum length of all asm stmts as 0.  */
43539   for (insn = start; insn; insn = NEXT_INSN (insn))
43540     {
43541       int min_size;
43542 
43543       if (LABEL_P (insn))
43544 	{
43545 	  int align = label_to_alignment (insn);
43546 	  int max_skip = label_to_max_skip (insn);
43547 
43548 	  if (max_skip > 15)
43549 	    max_skip = 15;
43550 	  /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43551 	     already in the current 16 byte page, because otherwise
43552 	     ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43553 	     bytes to reach 16 byte boundary.  */
43554 	  if (align <= 0
43555 	      || (align <= 3 && max_skip != (1 << align) - 1))
43556 	    max_skip = 0;
43557 	  if (dump_file)
43558 	    fprintf (dump_file, "Label %i with max_skip %i\n",
43559 		     INSN_UID (insn), max_skip);
43560 	  if (max_skip)
43561 	    {
43562 	      while (nbytes + max_skip >= 16)
43563 		{
43564 		  start = NEXT_INSN (start);
43565 		  if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43566 		      || CALL_P (start))
43567 		    njumps--, isjump = true;
43568 		  else
43569 		    isjump = false;
43570 		  nbytes -= min_insn_size (start);
43571 		}
43572 	    }
43573 	  continue;
43574 	}
43575 
43576       min_size = min_insn_size (insn);
43577       nbytes += min_size;
43578       if (dump_file)
43579 	fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43580 		 INSN_UID (insn), min_size);
43581       if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43582 	  || CALL_P (insn))
43583 	njumps++;
43584       else
43585 	continue;
43586 
43587       while (njumps > 3)
43588 	{
43589 	  start = NEXT_INSN (start);
43590 	  if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43591 	      || CALL_P (start))
43592 	    njumps--, isjump = true;
43593 	  else
43594 	    isjump = false;
43595 	  nbytes -= min_insn_size (start);
43596 	}
43597       gcc_assert (njumps >= 0);
43598       if (dump_file)
43599         fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43600 		 INSN_UID (start), INSN_UID (insn), nbytes);
43601 
43602       if (njumps == 3 && isjump && nbytes < 16)
43603 	{
43604 	  int padsize = 15 - nbytes + min_insn_size (insn);
43605 
43606 	  if (dump_file)
43607 	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43608 		     INSN_UID (insn), padsize);
43609           emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43610 	}
43611     }
43612 }
43613 #endif
43614 
43615 /* AMD Athlon works faster
43616    when RET is not destination of conditional jump or directly preceded
43617    by other jump instruction.  We avoid the penalty by inserting NOP just
43618    before the RET instructions in such cases.  */
43619 static void
43620 ix86_pad_returns (void)
43621 {
43622   edge e;
43623   edge_iterator ei;
43624 
43625   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43626     {
43627       basic_block bb = e->src;
43628       rtx_insn *ret = BB_END (bb);
43629       rtx_insn *prev;
43630       bool replace = false;
43631 
43632       if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43633 	  || optimize_bb_for_size_p (bb))
43634 	continue;
43635       for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43636 	if (active_insn_p (prev) || LABEL_P (prev))
43637 	  break;
43638       if (prev && LABEL_P (prev))
43639 	{
43640 	  edge e;
43641 	  edge_iterator ei;
43642 
43643 	  FOR_EACH_EDGE (e, ei, bb->preds)
43644 	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
43645 		&& !(e->flags & EDGE_FALLTHRU))
43646 	      {
43647 		replace = true;
43648 		break;
43649 	      }
43650 	}
43651       if (!replace)
43652 	{
43653 	  prev = prev_active_insn (ret);
43654 	  if (prev
43655 	      && ((JUMP_P (prev) && any_condjump_p (prev))
43656 		  || CALL_P (prev)))
43657 	    replace = true;
43658 	  /* Empty functions get branch mispredict even when
43659 	     the jump destination is not visible to us.  */
43660 	  if (!prev && !optimize_function_for_size_p (cfun))
43661 	    replace = true;
43662 	}
43663       if (replace)
43664 	{
43665 	  emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43666 	  delete_insn (ret);
43667 	}
43668     }
43669 }
43670 
43671 /* Count the minimum number of instructions in BB.  Return 4 if the
43672    number of instructions >= 4.  */
43673 
43674 static int
43675 ix86_count_insn_bb (basic_block bb)
43676 {
43677   rtx_insn *insn;
43678   int insn_count = 0;
43679 
43680   /* Count number of instructions in this block.  Return 4 if the number
43681      of instructions >= 4.  */
43682   FOR_BB_INSNS (bb, insn)
43683     {
43684       /* Only happen in exit blocks.  */
43685       if (JUMP_P (insn)
43686 	  && ANY_RETURN_P (PATTERN (insn)))
43687 	break;
43688 
43689       if (NONDEBUG_INSN_P (insn)
43690 	  && GET_CODE (PATTERN (insn)) != USE
43691 	  && GET_CODE (PATTERN (insn)) != CLOBBER)
43692 	{
43693 	  insn_count++;
43694 	  if (insn_count >= 4)
43695 	    return insn_count;
43696 	}
43697     }
43698 
43699   return insn_count;
43700 }
43701 
43702 
43703 /* Count the minimum number of instructions in code path in BB.
43704    Return 4 if the number of instructions >= 4.  */
43705 
43706 static int
43707 ix86_count_insn (basic_block bb)
43708 {
43709   edge e;
43710   edge_iterator ei;
43711   int min_prev_count;
43712 
43713   /* Only bother counting instructions along paths with no
43714      more than 2 basic blocks between entry and exit.  Given
43715      that BB has an edge to exit, determine if a predecessor
43716      of BB has an edge from entry.  If so, compute the number
43717      of instructions in the predecessor block.  If there
43718      happen to be multiple such blocks, compute the minimum.  */
43719   min_prev_count = 4;
43720   FOR_EACH_EDGE (e, ei, bb->preds)
43721     {
43722       edge prev_e;
43723       edge_iterator prev_ei;
43724 
43725       if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43726 	{
43727 	  min_prev_count = 0;
43728 	  break;
43729 	}
43730       FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43731 	{
43732 	  if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43733 	    {
43734 	      int count = ix86_count_insn_bb (e->src);
43735 	      if (count < min_prev_count)
43736 		min_prev_count = count;
43737 	      break;
43738 	    }
43739 	}
43740     }
43741 
43742   if (min_prev_count < 4)
43743     min_prev_count += ix86_count_insn_bb (bb);
43744 
43745   return min_prev_count;
43746 }
43747 
43748 /* Pad short function to 4 instructions.   */
43749 
43750 static void
43751 ix86_pad_short_function (void)
43752 {
43753   edge e;
43754   edge_iterator ei;
43755 
43756   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43757     {
43758       rtx_insn *ret = BB_END (e->src);
43759       if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43760 	{
43761 	  int insn_count = ix86_count_insn (e->src);
43762 
43763 	  /* Pad short function.  */
43764 	  if (insn_count < 4)
43765 	    {
43766 	      rtx_insn *insn = ret;
43767 
43768 	      /* Find epilogue.  */
43769 	      while (insn
43770 		     && (!NOTE_P (insn)
43771 			 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43772 		insn = PREV_INSN (insn);
43773 
43774 	      if (!insn)
43775 		insn = ret;
43776 
43777 	      /* Two NOPs count as one instruction.  */
43778 	      insn_count = 2 * (4 - insn_count);
43779 	      emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43780 	    }
43781 	}
43782     }
43783 }
43784 
43785 /* Fix up a Windows system unwinder issue.  If an EH region falls through into
43786    the epilogue, the Windows system unwinder will apply epilogue logic and
43787    produce incorrect offsets.  This can be avoided by adding a nop between
43788    the last insn that can throw and the first insn of the epilogue.  */
43789 
43790 static void
43791 ix86_seh_fixup_eh_fallthru (void)
43792 {
43793   edge e;
43794   edge_iterator ei;
43795 
43796   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43797     {
43798       rtx_insn *insn, *next;
43799 
43800       /* Find the beginning of the epilogue.  */
43801       for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43802 	if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43803 	  break;
43804       if (insn == NULL)
43805 	continue;
43806 
43807       /* We only care about preceding insns that can throw.  */
43808       insn = prev_active_insn (insn);
43809       if (insn == NULL || !can_throw_internal (insn))
43810 	continue;
43811 
43812       /* Do not separate calls from their debug information.  */
43813       for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43814 	if (NOTE_P (next)
43815             && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43816                 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43817 	  insn = next;
43818 	else
43819 	  break;
43820 
43821       emit_insn_after (gen_nops (const1_rtx), insn);
43822     }
43823 }
43824 
43825 /* Implement machine specific optimizations.  We implement padding of returns
43826    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
43827 static void
43828 ix86_reorg (void)
43829 {
43830   /* We are freeing block_for_insn in the toplev to keep compatibility
43831      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
43832   compute_bb_for_insn ();
43833 
43834   if (TARGET_SEH && current_function_has_exception_handlers ())
43835     ix86_seh_fixup_eh_fallthru ();
43836 
43837   if (optimize && optimize_function_for_speed_p (cfun))
43838     {
43839       if (TARGET_PAD_SHORT_FUNCTION)
43840 	ix86_pad_short_function ();
43841       else if (TARGET_PAD_RETURNS)
43842 	ix86_pad_returns ();
43843 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43844       if (TARGET_FOUR_JUMP_LIMIT)
43845 	ix86_avoid_jump_mispredicts ();
43846 #endif
43847     }
43848 }
43849 
43850 /* Return nonzero when QImode register that must be represented via REX prefix
43851    is used.  */
43852 bool
43853 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43854 {
43855   int i;
43856   extract_insn_cached (insn);
43857   for (i = 0; i < recog_data.n_operands; i++)
43858     if (GENERAL_REG_P (recog_data.operand[i])
43859 	&& !QI_REGNO_P (REGNO (recog_data.operand[i])))
43860        return true;
43861   return false;
43862 }
43863 
43864 /* Return true when INSN mentions register that must be encoded using REX
43865    prefix.  */
43866 bool
43867 x86_extended_reg_mentioned_p (rtx insn)
43868 {
43869   subrtx_iterator::array_type array;
43870   FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43871     {
43872       const_rtx x = *iter;
43873       if (REG_P (x)
43874 	  && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43875 	return true;
43876     }
43877   return false;
43878 }
43879 
43880 /* If profitable, negate (without causing overflow) integer constant
43881    of mode MODE at location LOC.  Return true in this case.  */
43882 bool
43883 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43884 {
43885   HOST_WIDE_INT val;
43886 
43887   if (!CONST_INT_P (*loc))
43888     return false;
43889 
43890   switch (mode)
43891     {
43892     case DImode:
43893       /* DImode x86_64 constants must fit in 32 bits.  */
43894       gcc_assert (x86_64_immediate_operand (*loc, mode));
43895 
43896       mode = SImode;
43897       break;
43898 
43899     case SImode:
43900     case HImode:
43901     case QImode:
43902       break;
43903 
43904     default:
43905       gcc_unreachable ();
43906     }
43907 
43908   /* Avoid overflows.  */
43909   if (mode_signbit_p (mode, *loc))
43910     return false;
43911 
43912   val = INTVAL (*loc);
43913 
43914   /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43915      Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
43916   if ((val < 0 && val != -128)
43917       || val == 128)
43918     {
43919       *loc = GEN_INT (-val);
43920       return true;
43921     }
43922 
43923   return false;
43924 }
43925 
43926 /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
43927    optabs would emit if we didn't have TFmode patterns.  */
43928 
43929 void
43930 x86_emit_floatuns (rtx operands[2])
43931 {
43932   rtx_code_label *neglab, *donelab;
43933   rtx i0, i1, f0, in, out;
43934   machine_mode mode, inmode;
43935 
43936   inmode = GET_MODE (operands[1]);
43937   gcc_assert (inmode == SImode || inmode == DImode);
43938 
43939   out = operands[0];
43940   in = force_reg (inmode, operands[1]);
43941   mode = GET_MODE (out);
43942   neglab = gen_label_rtx ();
43943   donelab = gen_label_rtx ();
43944   f0 = gen_reg_rtx (mode);
43945 
43946   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43947 
43948   expand_float (out, in, 0);
43949 
43950   emit_jump_insn (gen_jump (donelab));
43951   emit_barrier ();
43952 
43953   emit_label (neglab);
43954 
43955   i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43956 			    1, OPTAB_DIRECT);
43957   i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43958 			    1, OPTAB_DIRECT);
43959   i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43960 
43961   expand_float (f0, i0, 0);
43962 
43963   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43964 
43965   emit_label (donelab);
43966 }
43967 
43968 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43969 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43970 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43971 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43972 
43973 /* Get a vector mode of the same size as the original but with elements
43974    twice as wide.  This is only guaranteed to apply to integral vectors.  */
43975 
43976 static inline machine_mode
43977 get_mode_wider_vector (machine_mode o)
43978 {
43979   /* ??? Rely on the ordering that genmodes.c gives to vectors.  */
43980   machine_mode n = GET_MODE_WIDER_MODE (o);
43981   gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43982   gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43983   return n;
43984 }
43985 
43986 /* A subroutine of ix86_expand_vector_init_duplicate.  Tries to
43987    fill target with val via vec_duplicate.  */
43988 
43989 static bool
43990 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43991 {
43992   bool ok;
43993   rtx_insn *insn;
43994   rtx dup;
43995 
43996   /* First attempt to recognize VAL as-is.  */
43997   dup = gen_rtx_VEC_DUPLICATE (mode, val);
43998   insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43999   if (recog_memoized (insn) < 0)
44000     {
44001       rtx_insn *seq;
44002       /* If that fails, force VAL into a register.  */
44003 
44004       start_sequence ();
44005       XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
44006       seq = get_insns ();
44007       end_sequence ();
44008       if (seq)
44009 	emit_insn_before (seq, insn);
44010 
44011       ok = recog_memoized (insn) >= 0;
44012       gcc_assert (ok);
44013     }
44014   return true;
44015 }
44016 
44017 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
44018    with all elements equal to VAR.  Return true if successful.  */
44019 
44020 static bool
44021 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
44022 				   rtx target, rtx val)
44023 {
44024   bool ok;
44025 
44026   switch (mode)
44027     {
44028     case V2SImode:
44029     case V2SFmode:
44030       if (!mmx_ok)
44031 	return false;
44032       /* FALLTHRU */
44033 
44034     case V4DFmode:
44035     case V4DImode:
44036     case V8SFmode:
44037     case V8SImode:
44038     case V2DFmode:
44039     case V2DImode:
44040     case V4SFmode:
44041     case V4SImode:
44042     case V16SImode:
44043     case V8DImode:
44044     case V16SFmode:
44045     case V8DFmode:
44046       return ix86_vector_duplicate_value (mode, target, val);
44047 
44048     case V4HImode:
44049       if (!mmx_ok)
44050 	return false;
44051       if (TARGET_SSE || TARGET_3DNOW_A)
44052 	{
44053 	  rtx x;
44054 
44055 	  val = gen_lowpart (SImode, val);
44056 	  x = gen_rtx_TRUNCATE (HImode, val);
44057 	  x = gen_rtx_VEC_DUPLICATE (mode, x);
44058 	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
44059 	  return true;
44060 	}
44061       goto widen;
44062 
44063     case V8QImode:
44064       if (!mmx_ok)
44065 	return false;
44066       goto widen;
44067 
44068     case V8HImode:
44069       if (TARGET_AVX2)
44070 	return ix86_vector_duplicate_value (mode, target, val);
44071 
44072       if (TARGET_SSE2)
44073 	{
44074 	  struct expand_vec_perm_d dperm;
44075 	  rtx tmp1, tmp2;
44076 
44077 	permute:
44078 	  memset (&dperm, 0, sizeof (dperm));
44079 	  dperm.target = target;
44080 	  dperm.vmode = mode;
44081 	  dperm.nelt = GET_MODE_NUNITS (mode);
44082 	  dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
44083 	  dperm.one_operand_p = true;
44084 
44085 	  /* Extend to SImode using a paradoxical SUBREG.  */
44086 	  tmp1 = gen_reg_rtx (SImode);
44087 	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
44088 
44089 	  /* Insert the SImode value as low element of a V4SImode vector. */
44090 	  tmp2 = gen_reg_rtx (V4SImode);
44091 	  emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
44092 	  emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
44093 
44094 	  ok = (expand_vec_perm_1 (&dperm)
44095 		|| expand_vec_perm_broadcast_1 (&dperm));
44096 	  gcc_assert (ok);
44097 	  return ok;
44098 	}
44099       goto widen;
44100 
44101     case V16QImode:
44102       if (TARGET_AVX2)
44103 	return ix86_vector_duplicate_value (mode, target, val);
44104 
44105       if (TARGET_SSE2)
44106 	goto permute;
44107       goto widen;
44108 
44109     widen:
44110       /* Replicate the value once into the next wider mode and recurse.  */
44111       {
44112 	machine_mode smode, wsmode, wvmode;
44113 	rtx x;
44114 
44115 	smode = GET_MODE_INNER (mode);
44116 	wvmode = get_mode_wider_vector (mode);
44117 	wsmode = GET_MODE_INNER (wvmode);
44118 
44119 	val = convert_modes (wsmode, smode, val, true);
44120 	x = expand_simple_binop (wsmode, ASHIFT, val,
44121 				 GEN_INT (GET_MODE_BITSIZE (smode)),
44122 				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44123 	val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
44124 
44125 	x = gen_reg_rtx (wvmode);
44126 	ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
44127 	gcc_assert (ok);
44128 	emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
44129 	return ok;
44130       }
44131 
44132     case V16HImode:
44133     case V32QImode:
44134       if (TARGET_AVX2)
44135 	return ix86_vector_duplicate_value (mode, target, val);
44136       else
44137 	{
44138 	  machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
44139 	  rtx x = gen_reg_rtx (hvmode);
44140 
44141 	  ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44142 	  gcc_assert (ok);
44143 
44144 	  x = gen_rtx_VEC_CONCAT (mode, x, x);
44145 	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
44146 	}
44147       return true;
44148 
44149     case V64QImode:
44150     case V32HImode:
44151       if (TARGET_AVX512BW)
44152 	return ix86_vector_duplicate_value (mode, target, val);
44153       else
44154 	{
44155 	  machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
44156 	  rtx x = gen_reg_rtx (hvmode);
44157 
44158 	  ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44159 	  gcc_assert (ok);
44160 
44161 	  x = gen_rtx_VEC_CONCAT (mode, x, x);
44162 	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
44163 	}
44164       return true;
44165 
44166     default:
44167       return false;
44168     }
44169 }
44170 
44171 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
44172    whose ONE_VAR element is VAR, and other elements are zero.  Return true
44173    if successful.  */
44174 
44175 static bool
44176 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
44177 				     rtx target, rtx var, int one_var)
44178 {
44179   machine_mode vsimode;
44180   rtx new_target;
44181   rtx x, tmp;
44182   bool use_vector_set = false;
44183 
44184   switch (mode)
44185     {
44186     case V2DImode:
44187       /* For SSE4.1, we normally use vector set.  But if the second
44188 	 element is zero and inter-unit moves are OK, we use movq
44189 	 instead.  */
44190       use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
44191 			&& !(TARGET_INTER_UNIT_MOVES_TO_VEC
44192 			     && one_var == 0));
44193       break;
44194     case V16QImode:
44195     case V4SImode:
44196     case V4SFmode:
44197       use_vector_set = TARGET_SSE4_1;
44198       break;
44199     case V8HImode:
44200       use_vector_set = TARGET_SSE2;
44201       break;
44202     case V4HImode:
44203       use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
44204       break;
44205     case V32QImode:
44206     case V16HImode:
44207     case V8SImode:
44208     case V8SFmode:
44209     case V4DFmode:
44210       use_vector_set = TARGET_AVX;
44211       break;
44212     case V4DImode:
44213       /* Use ix86_expand_vector_set in 64bit mode only.  */
44214       use_vector_set = TARGET_AVX && TARGET_64BIT;
44215       break;
44216     default:
44217       break;
44218     }
44219 
44220   if (use_vector_set)
44221     {
44222       emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
44223       var = force_reg (GET_MODE_INNER (mode), var);
44224       ix86_expand_vector_set (mmx_ok, target, var, one_var);
44225       return true;
44226     }
44227 
44228   switch (mode)
44229     {
44230     case V2SFmode:
44231     case V2SImode:
44232       if (!mmx_ok)
44233 	return false;
44234       /* FALLTHRU */
44235 
44236     case V2DFmode:
44237     case V2DImode:
44238       if (one_var != 0)
44239 	return false;
44240       var = force_reg (GET_MODE_INNER (mode), var);
44241       x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
44242       emit_insn (gen_rtx_SET (VOIDmode, target, x));
44243       return true;
44244 
44245     case V4SFmode:
44246     case V4SImode:
44247       if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
44248 	new_target = gen_reg_rtx (mode);
44249       else
44250 	new_target = target;
44251       var = force_reg (GET_MODE_INNER (mode), var);
44252       x = gen_rtx_VEC_DUPLICATE (mode, var);
44253       x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
44254       emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
44255       if (one_var != 0)
44256 	{
44257 	  /* We need to shuffle the value to the correct position, so
44258 	     create a new pseudo to store the intermediate result.  */
44259 
44260 	  /* With SSE2, we can use the integer shuffle insns.  */
44261 	  if (mode != V4SFmode && TARGET_SSE2)
44262 	    {
44263 	      emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
44264 					    const1_rtx,
44265 					    GEN_INT (one_var == 1 ? 0 : 1),
44266 					    GEN_INT (one_var == 2 ? 0 : 1),
44267 					    GEN_INT (one_var == 3 ? 0 : 1)));
44268 	      if (target != new_target)
44269 		emit_move_insn (target, new_target);
44270 	      return true;
44271 	    }
44272 
44273 	  /* Otherwise convert the intermediate result to V4SFmode and
44274 	     use the SSE1 shuffle instructions.  */
44275 	  if (mode != V4SFmode)
44276 	    {
44277 	      tmp = gen_reg_rtx (V4SFmode);
44278 	      emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
44279 	    }
44280 	  else
44281 	    tmp = new_target;
44282 
44283 	  emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44284 				       const1_rtx,
44285 				       GEN_INT (one_var == 1 ? 0 : 1),
44286 				       GEN_INT (one_var == 2 ? 0+4 : 1+4),
44287 				       GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44288 
44289 	  if (mode != V4SFmode)
44290 	    emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44291 	  else if (tmp != target)
44292 	    emit_move_insn (target, tmp);
44293 	}
44294       else if (target != new_target)
44295 	emit_move_insn (target, new_target);
44296       return true;
44297 
44298     case V8HImode:
44299     case V16QImode:
44300       vsimode = V4SImode;
44301       goto widen;
44302     case V4HImode:
44303     case V8QImode:
44304       if (!mmx_ok)
44305 	return false;
44306       vsimode = V2SImode;
44307       goto widen;
44308     widen:
44309       if (one_var != 0)
44310 	return false;
44311 
44312       /* Zero extend the variable element to SImode and recurse.  */
44313       var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44314 
44315       x = gen_reg_rtx (vsimode);
44316       if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44317 						var, one_var))
44318 	gcc_unreachable ();
44319 
44320       emit_move_insn (target, gen_lowpart (mode, x));
44321       return true;
44322 
44323     default:
44324       return false;
44325     }
44326 }
44327 
44328 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
44329    consisting of the values in VALS.  It is known that all elements
44330    except ONE_VAR are constants.  Return true if successful.  */
44331 
44332 static bool
44333 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44334 				 rtx target, rtx vals, int one_var)
44335 {
44336   rtx var = XVECEXP (vals, 0, one_var);
44337   machine_mode wmode;
44338   rtx const_vec, x;
44339 
44340   const_vec = copy_rtx (vals);
44341   XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44342   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44343 
44344   switch (mode)
44345     {
44346     case V2DFmode:
44347     case V2DImode:
44348     case V2SFmode:
44349     case V2SImode:
44350       /* For the two element vectors, it's just as easy to use
44351 	 the general case.  */
44352       return false;
44353 
44354     case V4DImode:
44355       /* Use ix86_expand_vector_set in 64bit mode only.  */
44356       if (!TARGET_64BIT)
44357 	return false;
44358     case V4DFmode:
44359     case V8SFmode:
44360     case V8SImode:
44361     case V16HImode:
44362     case V32QImode:
44363     case V4SFmode:
44364     case V4SImode:
44365     case V8HImode:
44366     case V4HImode:
44367       break;
44368 
44369     case V16QImode:
44370       if (TARGET_SSE4_1)
44371 	break;
44372       wmode = V8HImode;
44373       goto widen;
44374     case V8QImode:
44375       wmode = V4HImode;
44376       goto widen;
44377     widen:
44378       /* There's no way to set one QImode entry easily.  Combine
44379 	 the variable value with its adjacent constant value, and
44380 	 promote to an HImode set.  */
44381       x = XVECEXP (vals, 0, one_var ^ 1);
44382       if (one_var & 1)
44383 	{
44384 	  var = convert_modes (HImode, QImode, var, true);
44385 	  var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44386 				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
44387 	  x = GEN_INT (INTVAL (x) & 0xff);
44388 	}
44389       else
44390 	{
44391 	  var = convert_modes (HImode, QImode, var, true);
44392 	  x = gen_int_mode (INTVAL (x) << 8, HImode);
44393 	}
44394       if (x != const0_rtx)
44395 	var = expand_simple_binop (HImode, IOR, var, x, var,
44396 				   1, OPTAB_LIB_WIDEN);
44397 
44398       x = gen_reg_rtx (wmode);
44399       emit_move_insn (x, gen_lowpart (wmode, const_vec));
44400       ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44401 
44402       emit_move_insn (target, gen_lowpart (mode, x));
44403       return true;
44404 
44405     default:
44406       return false;
44407     }
44408 
44409   emit_move_insn (target, const_vec);
44410   ix86_expand_vector_set (mmx_ok, target, var, one_var);
44411   return true;
44412 }
44413 
44414 /* A subroutine of ix86_expand_vector_init_general.  Use vector
44415    concatenate to handle the most general case: all values variable,
44416    and none identical.  */
44417 
44418 static void
44419 ix86_expand_vector_init_concat (machine_mode mode,
44420 				rtx target, rtx *ops, int n)
44421 {
44422   machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44423   rtx first[16], second[8], third[4];
44424   rtvec v;
44425   int i, j;
44426 
44427   switch (n)
44428     {
44429     case 2:
44430       switch (mode)
44431 	{
44432 	case V16SImode:
44433 	  cmode = V8SImode;
44434 	  break;
44435 	case V16SFmode:
44436 	  cmode = V8SFmode;
44437 	  break;
44438 	case V8DImode:
44439 	  cmode = V4DImode;
44440 	  break;
44441 	case V8DFmode:
44442 	  cmode = V4DFmode;
44443 	  break;
44444 	case V8SImode:
44445 	  cmode = V4SImode;
44446 	  break;
44447 	case V8SFmode:
44448 	  cmode = V4SFmode;
44449 	  break;
44450 	case V4DImode:
44451 	  cmode = V2DImode;
44452 	  break;
44453 	case V4DFmode:
44454 	  cmode = V2DFmode;
44455 	  break;
44456 	case V4SImode:
44457 	  cmode = V2SImode;
44458 	  break;
44459 	case V4SFmode:
44460 	  cmode = V2SFmode;
44461 	  break;
44462 	case V2DImode:
44463 	  cmode = DImode;
44464 	  break;
44465 	case V2SImode:
44466 	  cmode = SImode;
44467 	  break;
44468 	case V2DFmode:
44469 	  cmode = DFmode;
44470 	  break;
44471 	case V2SFmode:
44472 	  cmode = SFmode;
44473 	  break;
44474 	default:
44475 	  gcc_unreachable ();
44476 	}
44477 
44478       if (!register_operand (ops[1], cmode))
44479 	ops[1] = force_reg (cmode, ops[1]);
44480       if (!register_operand (ops[0], cmode))
44481 	ops[0] = force_reg (cmode, ops[0]);
44482       emit_insn (gen_rtx_SET (VOIDmode, target,
44483 			      gen_rtx_VEC_CONCAT (mode, ops[0],
44484 						  ops[1])));
44485       break;
44486 
44487     case 4:
44488       switch (mode)
44489 	{
44490 	case V4DImode:
44491 	  cmode = V2DImode;
44492 	  break;
44493 	case V4DFmode:
44494 	  cmode = V2DFmode;
44495 	  break;
44496 	case V4SImode:
44497 	  cmode = V2SImode;
44498 	  break;
44499 	case V4SFmode:
44500 	  cmode = V2SFmode;
44501 	  break;
44502 	default:
44503 	  gcc_unreachable ();
44504 	}
44505       goto half;
44506 
44507     case 8:
44508       switch (mode)
44509 	{
44510 	case V8DImode:
44511 	  cmode = V2DImode;
44512 	  hmode = V4DImode;
44513 	  break;
44514 	case V8DFmode:
44515 	  cmode = V2DFmode;
44516 	  hmode = V4DFmode;
44517 	  break;
44518 	case V8SImode:
44519 	  cmode = V2SImode;
44520 	  hmode = V4SImode;
44521 	  break;
44522 	case V8SFmode:
44523 	  cmode = V2SFmode;
44524 	  hmode = V4SFmode;
44525 	  break;
44526 	default:
44527 	  gcc_unreachable ();
44528 	}
44529       goto half;
44530 
44531     case 16:
44532       switch (mode)
44533 	{
44534 	case V16SImode:
44535 	  cmode = V2SImode;
44536 	  hmode = V4SImode;
44537 	  gmode = V8SImode;
44538 	  break;
44539 	case V16SFmode:
44540 	  cmode = V2SFmode;
44541 	  hmode = V4SFmode;
44542 	  gmode = V8SFmode;
44543 	  break;
44544 	default:
44545 	  gcc_unreachable ();
44546 	}
44547       goto half;
44548 
44549 half:
44550       /* FIXME: We process inputs backward to help RA.  PR 36222.  */
44551       i = n - 1;
44552       j = (n >> 1) - 1;
44553       for (; i > 0; i -= 2, j--)
44554 	{
44555 	  first[j] = gen_reg_rtx (cmode);
44556 	  v = gen_rtvec (2, ops[i - 1], ops[i]);
44557 	  ix86_expand_vector_init (false, first[j],
44558 				   gen_rtx_PARALLEL (cmode, v));
44559 	}
44560 
44561       n >>= 1;
44562       if (n > 4)
44563 	{
44564 	  gcc_assert (hmode != VOIDmode);
44565 	  gcc_assert (gmode != VOIDmode);
44566 	  for (i = j = 0; i < n; i += 2, j++)
44567 	    {
44568 	      second[j] = gen_reg_rtx (hmode);
44569 	      ix86_expand_vector_init_concat (hmode, second [j],
44570 					      &first [i], 2);
44571 	    }
44572 	  n >>= 1;
44573 	  for (i = j = 0; i < n; i += 2, j++)
44574 	    {
44575 	      third[j] = gen_reg_rtx (gmode);
44576 	      ix86_expand_vector_init_concat (gmode, third[j],
44577 					      &second[i], 2);
44578 	    }
44579 	  n >>= 1;
44580 	  ix86_expand_vector_init_concat (mode, target, third, n);
44581 	}
44582       else if (n > 2)
44583 	{
44584 	  gcc_assert (hmode != VOIDmode);
44585 	  for (i = j = 0; i < n; i += 2, j++)
44586 	    {
44587 	      second[j] = gen_reg_rtx (hmode);
44588 	      ix86_expand_vector_init_concat (hmode, second [j],
44589 					      &first [i], 2);
44590 	    }
44591 	  n >>= 1;
44592 	  ix86_expand_vector_init_concat (mode, target, second, n);
44593 	}
44594       else
44595 	ix86_expand_vector_init_concat (mode, target, first, n);
44596       break;
44597 
44598     default:
44599       gcc_unreachable ();
44600     }
44601 }
44602 
44603 /* A subroutine of ix86_expand_vector_init_general.  Use vector
44604    interleave to handle the most general case: all values variable,
44605    and none identical.  */
44606 
44607 static void
44608 ix86_expand_vector_init_interleave (machine_mode mode,
44609 				    rtx target, rtx *ops, int n)
44610 {
44611   machine_mode first_imode, second_imode, third_imode, inner_mode;
44612   int i, j;
44613   rtx op0, op1;
44614   rtx (*gen_load_even) (rtx, rtx, rtx);
44615   rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44616   rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44617 
44618   switch (mode)
44619     {
44620     case V8HImode:
44621       gen_load_even = gen_vec_setv8hi;
44622       gen_interleave_first_low = gen_vec_interleave_lowv4si;
44623       gen_interleave_second_low = gen_vec_interleave_lowv2di;
44624       inner_mode = HImode;
44625       first_imode = V4SImode;
44626       second_imode = V2DImode;
44627       third_imode = VOIDmode;
44628       break;
44629     case V16QImode:
44630       gen_load_even = gen_vec_setv16qi;
44631       gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44632       gen_interleave_second_low = gen_vec_interleave_lowv4si;
44633       inner_mode = QImode;
44634       first_imode = V8HImode;
44635       second_imode = V4SImode;
44636       third_imode = V2DImode;
44637       break;
44638     default:
44639       gcc_unreachable ();
44640     }
44641 
44642   for (i = 0; i < n; i++)
44643     {
44644       /* Extend the odd elment to SImode using a paradoxical SUBREG.  */
44645       op0 = gen_reg_rtx (SImode);
44646       emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44647 
44648       /* Insert the SImode value as low element of V4SImode vector. */
44649       op1 = gen_reg_rtx (V4SImode);
44650       op0 = gen_rtx_VEC_MERGE (V4SImode,
44651 			       gen_rtx_VEC_DUPLICATE (V4SImode,
44652 						      op0),
44653 			       CONST0_RTX (V4SImode),
44654 			       const1_rtx);
44655       emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44656 
44657       /* Cast the V4SImode vector back to a vector in orignal mode.  */
44658       op0 = gen_reg_rtx (mode);
44659       emit_move_insn (op0, gen_lowpart (mode, op1));
44660 
44661       /* Load even elements into the second position.  */
44662       emit_insn (gen_load_even (op0,
44663 				force_reg (inner_mode,
44664 					   ops [i + i + 1]),
44665 				const1_rtx));
44666 
44667       /* Cast vector to FIRST_IMODE vector.  */
44668       ops[i] = gen_reg_rtx (first_imode);
44669       emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44670     }
44671 
44672   /* Interleave low FIRST_IMODE vectors.  */
44673   for (i = j = 0; i < n; i += 2, j++)
44674     {
44675       op0 = gen_reg_rtx (first_imode);
44676       emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44677 
44678       /* Cast FIRST_IMODE vector to SECOND_IMODE vector.  */
44679       ops[j] = gen_reg_rtx (second_imode);
44680       emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44681     }
44682 
44683   /* Interleave low SECOND_IMODE vectors.  */
44684   switch (second_imode)
44685     {
44686     case V4SImode:
44687       for (i = j = 0; i < n / 2; i += 2, j++)
44688 	{
44689 	  op0 = gen_reg_rtx (second_imode);
44690 	  emit_insn (gen_interleave_second_low (op0, ops[i],
44691 						ops[i + 1]));
44692 
44693 	  /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44694 	     vector.  */
44695 	  ops[j] = gen_reg_rtx (third_imode);
44696 	  emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44697 	}
44698       second_imode = V2DImode;
44699       gen_interleave_second_low = gen_vec_interleave_lowv2di;
44700       /* FALLTHRU */
44701 
44702     case V2DImode:
44703       op0 = gen_reg_rtx (second_imode);
44704       emit_insn (gen_interleave_second_low (op0, ops[0],
44705 					    ops[1]));
44706 
44707       /* Cast the SECOND_IMODE vector back to a vector on original
44708 	 mode.  */
44709       emit_insn (gen_rtx_SET (VOIDmode, target,
44710 			      gen_lowpart (mode, op0)));
44711       break;
44712 
44713     default:
44714       gcc_unreachable ();
44715     }
44716 }
44717 
44718 /* A subroutine of ix86_expand_vector_init.  Handle the most general case:
44719    all values variable, and none identical.  */
44720 
44721 static void
44722 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44723 				 rtx target, rtx vals)
44724 {
44725   rtx ops[64], op0, op1, op2, op3, op4, op5;
44726   machine_mode half_mode = VOIDmode;
44727   machine_mode quarter_mode = VOIDmode;
44728   int n, i;
44729 
44730   switch (mode)
44731     {
44732     case V2SFmode:
44733     case V2SImode:
44734       if (!mmx_ok && !TARGET_SSE)
44735 	break;
44736       /* FALLTHRU */
44737 
44738     case V16SImode:
44739     case V16SFmode:
44740     case V8DFmode:
44741     case V8DImode:
44742     case V8SFmode:
44743     case V8SImode:
44744     case V4DFmode:
44745     case V4DImode:
44746     case V4SFmode:
44747     case V4SImode:
44748     case V2DFmode:
44749     case V2DImode:
44750       n = GET_MODE_NUNITS (mode);
44751       for (i = 0; i < n; i++)
44752 	ops[i] = XVECEXP (vals, 0, i);
44753       ix86_expand_vector_init_concat (mode, target, ops, n);
44754       return;
44755 
44756     case V32QImode:
44757       half_mode = V16QImode;
44758       goto half;
44759 
44760     case V16HImode:
44761       half_mode = V8HImode;
44762       goto half;
44763 
44764 half:
44765       n = GET_MODE_NUNITS (mode);
44766       for (i = 0; i < n; i++)
44767 	ops[i] = XVECEXP (vals, 0, i);
44768       op0 = gen_reg_rtx (half_mode);
44769       op1 = gen_reg_rtx (half_mode);
44770       ix86_expand_vector_init_interleave (half_mode, op0, ops,
44771 					  n >> 2);
44772       ix86_expand_vector_init_interleave (half_mode, op1,
44773 					  &ops [n >> 1], n >> 2);
44774       emit_insn (gen_rtx_SET (VOIDmode, target,
44775 			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
44776       return;
44777 
44778     case V64QImode:
44779       quarter_mode = V16QImode;
44780       half_mode = V32QImode;
44781       goto quarter;
44782 
44783     case V32HImode:
44784       quarter_mode = V8HImode;
44785       half_mode = V16HImode;
44786       goto quarter;
44787 
44788 quarter:
44789       n = GET_MODE_NUNITS (mode);
44790       for (i = 0; i < n; i++)
44791 	ops[i] = XVECEXP (vals, 0, i);
44792       op0 = gen_reg_rtx (quarter_mode);
44793       op1 = gen_reg_rtx (quarter_mode);
44794       op2 = gen_reg_rtx (quarter_mode);
44795       op3 = gen_reg_rtx (quarter_mode);
44796       op4 = gen_reg_rtx (half_mode);
44797       op5 = gen_reg_rtx (half_mode);
44798       ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44799 					  n >> 3);
44800       ix86_expand_vector_init_interleave (quarter_mode, op1,
44801 					  &ops [n >> 2], n >> 3);
44802       ix86_expand_vector_init_interleave (quarter_mode, op2,
44803 					  &ops [n >> 1], n >> 3);
44804       ix86_expand_vector_init_interleave (quarter_mode, op3,
44805 					  &ops [(n >> 1) | (n >> 2)], n >> 3);
44806       emit_insn (gen_rtx_SET (VOIDmode, op4,
44807 			      gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44808       emit_insn (gen_rtx_SET (VOIDmode, op5,
44809 			      gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44810       emit_insn (gen_rtx_SET (VOIDmode, target,
44811 			      gen_rtx_VEC_CONCAT (mode, op4, op5)));
44812       return;
44813 
44814     case V16QImode:
44815       if (!TARGET_SSE4_1)
44816 	break;
44817       /* FALLTHRU */
44818 
44819     case V8HImode:
44820       if (!TARGET_SSE2)
44821 	break;
44822 
44823       /* Don't use ix86_expand_vector_init_interleave if we can't
44824 	 move from GPR to SSE register directly.  */
44825       if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44826 	break;
44827 
44828       n = GET_MODE_NUNITS (mode);
44829       for (i = 0; i < n; i++)
44830 	ops[i] = XVECEXP (vals, 0, i);
44831       ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44832       return;
44833 
44834     case V4HImode:
44835     case V8QImode:
44836       break;
44837 
44838     default:
44839       gcc_unreachable ();
44840     }
44841 
44842     {
44843       int i, j, n_elts, n_words, n_elt_per_word;
44844       machine_mode inner_mode;
44845       rtx words[4], shift;
44846 
44847       inner_mode = GET_MODE_INNER (mode);
44848       n_elts = GET_MODE_NUNITS (mode);
44849       n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44850       n_elt_per_word = n_elts / n_words;
44851       shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44852 
44853       for (i = 0; i < n_words; ++i)
44854 	{
44855 	  rtx word = NULL_RTX;
44856 
44857 	  for (j = 0; j < n_elt_per_word; ++j)
44858 	    {
44859 	      rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44860 	      elt = convert_modes (word_mode, inner_mode, elt, true);
44861 
44862 	      if (j == 0)
44863 		word = elt;
44864 	      else
44865 		{
44866 		  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44867 					      word, 1, OPTAB_LIB_WIDEN);
44868 		  word = expand_simple_binop (word_mode, IOR, word, elt,
44869 					      word, 1, OPTAB_LIB_WIDEN);
44870 		}
44871 	    }
44872 
44873 	  words[i] = word;
44874 	}
44875 
44876       if (n_words == 1)
44877 	emit_move_insn (target, gen_lowpart (mode, words[0]));
44878       else if (n_words == 2)
44879 	{
44880 	  rtx tmp = gen_reg_rtx (mode);
44881 	  emit_clobber (tmp);
44882 	  emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44883 	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44884 	  emit_move_insn (target, tmp);
44885 	}
44886       else if (n_words == 4)
44887 	{
44888 	  rtx tmp = gen_reg_rtx (V4SImode);
44889 	  gcc_assert (word_mode == SImode);
44890 	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44891 	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44892 	  emit_move_insn (target, gen_lowpart (mode, tmp));
44893 	}
44894       else
44895 	gcc_unreachable ();
44896     }
44897 }
44898 
44899 /* Initialize vector TARGET via VALS.  Suppress the use of MMX
44900    instructions unless MMX_OK is true.  */
44901 
44902 void
44903 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44904 {
44905   machine_mode mode = GET_MODE (target);
44906   machine_mode inner_mode = GET_MODE_INNER (mode);
44907   int n_elts = GET_MODE_NUNITS (mode);
44908   int n_var = 0, one_var = -1;
44909   bool all_same = true, all_const_zero = true;
44910   int i;
44911   rtx x;
44912 
44913   for (i = 0; i < n_elts; ++i)
44914     {
44915       x = XVECEXP (vals, 0, i);
44916       if (!(CONST_INT_P (x)
44917 	    || GET_CODE (x) == CONST_DOUBLE
44918 	    || GET_CODE (x) == CONST_FIXED))
44919 	n_var++, one_var = i;
44920       else if (x != CONST0_RTX (inner_mode))
44921 	all_const_zero = false;
44922       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44923 	all_same = false;
44924     }
44925 
44926   /* Constants are best loaded from the constant pool.  */
44927   if (n_var == 0)
44928     {
44929       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44930       return;
44931     }
44932 
44933   /* If all values are identical, broadcast the value.  */
44934   if (all_same
44935       && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44936 					    XVECEXP (vals, 0, 0)))
44937     return;
44938 
44939   /* Values where only one field is non-constant are best loaded from
44940      the pool and overwritten via move later.  */
44941   if (n_var == 1)
44942     {
44943       if (all_const_zero
44944 	  && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44945 						  XVECEXP (vals, 0, one_var),
44946 						  one_var))
44947 	return;
44948 
44949       if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44950 	return;
44951     }
44952 
44953   ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44954 }
44955 
44956 void
44957 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44958 {
44959   machine_mode mode = GET_MODE (target);
44960   machine_mode inner_mode = GET_MODE_INNER (mode);
44961   machine_mode half_mode;
44962   bool use_vec_merge = false;
44963   rtx tmp;
44964   static rtx (*gen_extract[6][2]) (rtx, rtx)
44965     = {
44966 	{ gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44967 	{ gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44968 	{ gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44969 	{ gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44970 	{ gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44971 	{ gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44972       };
44973   static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44974     = {
44975 	{ gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44976 	{ gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44977 	{ gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44978 	{ gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44979 	{ gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44980 	{ gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44981       };
44982   int i, j, n;
44983   machine_mode mmode = VOIDmode;
44984   rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
44985 
44986   switch (mode)
44987     {
44988     case V2SFmode:
44989     case V2SImode:
44990       if (mmx_ok)
44991 	{
44992 	  tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44993 	  ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44994 	  if (elt == 0)
44995 	    tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44996 	  else
44997 	    tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44998 	  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44999 	  return;
45000 	}
45001       break;
45002 
45003     case V2DImode:
45004       use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
45005       if (use_vec_merge)
45006 	break;
45007 
45008       tmp = gen_reg_rtx (GET_MODE_INNER (mode));
45009       ix86_expand_vector_extract (false, tmp, target, 1 - elt);
45010       if (elt == 0)
45011 	tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
45012       else
45013 	tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
45014       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45015       return;
45016 
45017     case V2DFmode:
45018       {
45019 	rtx op0, op1;
45020 
45021 	/* For the two element vectors, we implement a VEC_CONCAT with
45022 	   the extraction of the other element.  */
45023 
45024 	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
45025 	tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
45026 
45027 	if (elt == 0)
45028 	  op0 = val, op1 = tmp;
45029 	else
45030 	  op0 = tmp, op1 = val;
45031 
45032 	tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
45033 	emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45034       }
45035       return;
45036 
45037     case V4SFmode:
45038       use_vec_merge = TARGET_SSE4_1;
45039       if (use_vec_merge)
45040 	break;
45041 
45042       switch (elt)
45043 	{
45044 	case 0:
45045 	  use_vec_merge = true;
45046 	  break;
45047 
45048 	case 1:
45049 	  /* tmp = target = A B C D */
45050 	  tmp = copy_to_reg (target);
45051 	  /* target = A A B B */
45052 	  emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
45053 	  /* target = X A B B */
45054 	  ix86_expand_vector_set (false, target, val, 0);
45055 	  /* target = A X C D  */
45056 	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45057 					  const1_rtx, const0_rtx,
45058 					  GEN_INT (2+4), GEN_INT (3+4)));
45059 	  return;
45060 
45061 	case 2:
45062 	  /* tmp = target = A B C D */
45063 	  tmp = copy_to_reg (target);
45064 	  /* tmp = X B C D */
45065 	  ix86_expand_vector_set (false, tmp, val, 0);
45066 	  /* target = A B X D */
45067 	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45068 					  const0_rtx, const1_rtx,
45069 					  GEN_INT (0+4), GEN_INT (3+4)));
45070 	  return;
45071 
45072 	case 3:
45073 	  /* tmp = target = A B C D */
45074 	  tmp = copy_to_reg (target);
45075 	  /* tmp = X B C D */
45076 	  ix86_expand_vector_set (false, tmp, val, 0);
45077 	  /* target = A B X D */
45078 	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45079 					  const0_rtx, const1_rtx,
45080 					  GEN_INT (2+4), GEN_INT (0+4)));
45081 	  return;
45082 
45083 	default:
45084 	  gcc_unreachable ();
45085 	}
45086       break;
45087 
45088     case V4SImode:
45089       use_vec_merge = TARGET_SSE4_1;
45090       if (use_vec_merge)
45091 	break;
45092 
45093       /* Element 0 handled by vec_merge below.  */
45094       if (elt == 0)
45095 	{
45096 	  use_vec_merge = true;
45097 	  break;
45098 	}
45099 
45100       if (TARGET_SSE2)
45101 	{
45102 	  /* With SSE2, use integer shuffles to swap element 0 and ELT,
45103 	     store into element 0, then shuffle them back.  */
45104 
45105 	  rtx order[4];
45106 
45107 	  order[0] = GEN_INT (elt);
45108 	  order[1] = const1_rtx;
45109 	  order[2] = const2_rtx;
45110 	  order[3] = GEN_INT (3);
45111 	  order[elt] = const0_rtx;
45112 
45113 	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45114 					order[1], order[2], order[3]));
45115 
45116 	  ix86_expand_vector_set (false, target, val, 0);
45117 
45118 	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45119 					order[1], order[2], order[3]));
45120 	}
45121       else
45122 	{
45123 	  /* For SSE1, we have to reuse the V4SF code.  */
45124 	  rtx t = gen_reg_rtx (V4SFmode);
45125 	  emit_move_insn (t, gen_lowpart (V4SFmode, target));
45126 	  ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
45127 	  emit_move_insn (target, gen_lowpart (mode, t));
45128 	}
45129       return;
45130 
45131     case V8HImode:
45132       use_vec_merge = TARGET_SSE2;
45133       break;
45134     case V4HImode:
45135       use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45136       break;
45137 
45138     case V16QImode:
45139       use_vec_merge = TARGET_SSE4_1;
45140       break;
45141 
45142     case V8QImode:
45143       break;
45144 
45145     case V32QImode:
45146       half_mode = V16QImode;
45147       j = 0;
45148       n = 16;
45149       goto half;
45150 
45151     case V16HImode:
45152       half_mode = V8HImode;
45153       j = 1;
45154       n = 8;
45155       goto half;
45156 
45157     case V8SImode:
45158       half_mode = V4SImode;
45159       j = 2;
45160       n = 4;
45161       goto half;
45162 
45163     case V4DImode:
45164       half_mode = V2DImode;
45165       j = 3;
45166       n = 2;
45167       goto half;
45168 
45169     case V8SFmode:
45170       half_mode = V4SFmode;
45171       j = 4;
45172       n = 4;
45173       goto half;
45174 
45175     case V4DFmode:
45176       half_mode = V2DFmode;
45177       j = 5;
45178       n = 2;
45179       goto half;
45180 
45181 half:
45182       /* Compute offset.  */
45183       i = elt / n;
45184       elt %= n;
45185 
45186       gcc_assert (i <= 1);
45187 
45188       /* Extract the half.  */
45189       tmp = gen_reg_rtx (half_mode);
45190       emit_insn (gen_extract[j][i] (tmp, target));
45191 
45192       /* Put val in tmp at elt.  */
45193       ix86_expand_vector_set (false, tmp, val, elt);
45194 
45195       /* Put it back.  */
45196       emit_insn (gen_insert[j][i] (target, target, tmp));
45197       return;
45198 
45199     case V8DFmode:
45200       if (TARGET_AVX512F)
45201 	{
45202 	  mmode = QImode;
45203 	  gen_blendm = gen_avx512f_blendmv8df;
45204 	}
45205       break;
45206 
45207     case V8DImode:
45208       if (TARGET_AVX512F)
45209 	{
45210 	  mmode = QImode;
45211 	  gen_blendm = gen_avx512f_blendmv8di;
45212 	}
45213       break;
45214 
45215     case V16SFmode:
45216       if (TARGET_AVX512F)
45217 	{
45218 	  mmode = HImode;
45219 	  gen_blendm = gen_avx512f_blendmv16si;
45220 	}
45221       break;
45222 
45223     case V16SImode:
45224       if (TARGET_AVX512F)
45225 	{
45226 	  mmode = HImode;
45227 	  gen_blendm = gen_avx512f_blendmv16si;
45228 	}
45229       break;
45230 
45231     case V32HImode:
45232       if (TARGET_AVX512F && TARGET_AVX512BW)
45233 	{
45234 	  mmode = SImode;
45235 	  gen_blendm = gen_avx512bw_blendmv32hi;
45236 	}
45237       break;
45238 
45239     case V64QImode:
45240       if (TARGET_AVX512F && TARGET_AVX512BW)
45241 	{
45242 	  mmode = DImode;
45243 	  gen_blendm = gen_avx512bw_blendmv64qi;
45244 	}
45245       break;
45246 
45247     default:
45248       break;
45249     }
45250 
45251   if (mmode != VOIDmode)
45252     {
45253       tmp = gen_reg_rtx (mode);
45254       emit_insn (gen_rtx_SET (VOIDmode, tmp,
45255 			      gen_rtx_VEC_DUPLICATE (mode, val)));
45256       /* The avx512*_blendm<mode> expanders have different operand order
45257 	 from VEC_MERGE.  In VEC_MERGE, the first input operand is used for
45258 	 elements where the mask is set and second input operand otherwise,
45259 	 in {sse,avx}*_*blend* the first input operand is used for elements
45260 	 where the mask is clear and second input operand otherwise.  */
45261       emit_insn (gen_blendm (target, target, tmp,
45262 			     force_reg (mmode,
45263 					gen_int_mode (1 << elt, mmode))));
45264     }
45265   else if (use_vec_merge)
45266     {
45267       tmp = gen_rtx_VEC_DUPLICATE (mode, val);
45268       tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
45269       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45270     }
45271   else
45272     {
45273       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45274 
45275       emit_move_insn (mem, target);
45276 
45277       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45278       emit_move_insn (tmp, val);
45279 
45280       emit_move_insn (target, mem);
45281     }
45282 }
45283 
45284 void
45285 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45286 {
45287   machine_mode mode = GET_MODE (vec);
45288   machine_mode inner_mode = GET_MODE_INNER (mode);
45289   bool use_vec_extr = false;
45290   rtx tmp;
45291 
45292   switch (mode)
45293     {
45294     case V2SImode:
45295     case V2SFmode:
45296       if (!mmx_ok)
45297 	break;
45298       /* FALLTHRU */
45299 
45300     case V2DFmode:
45301     case V2DImode:
45302       use_vec_extr = true;
45303       break;
45304 
45305     case V4SFmode:
45306       use_vec_extr = TARGET_SSE4_1;
45307       if (use_vec_extr)
45308 	break;
45309 
45310       switch (elt)
45311 	{
45312 	case 0:
45313 	  tmp = vec;
45314 	  break;
45315 
45316 	case 1:
45317 	case 3:
45318 	  tmp = gen_reg_rtx (mode);
45319 	  emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45320 				       GEN_INT (elt), GEN_INT (elt),
45321 				       GEN_INT (elt+4), GEN_INT (elt+4)));
45322 	  break;
45323 
45324 	case 2:
45325 	  tmp = gen_reg_rtx (mode);
45326 	  emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45327 	  break;
45328 
45329 	default:
45330 	  gcc_unreachable ();
45331 	}
45332       vec = tmp;
45333       use_vec_extr = true;
45334       elt = 0;
45335       break;
45336 
45337     case V4SImode:
45338       use_vec_extr = TARGET_SSE4_1;
45339       if (use_vec_extr)
45340 	break;
45341 
45342       if (TARGET_SSE2)
45343 	{
45344 	  switch (elt)
45345 	    {
45346 	    case 0:
45347 	      tmp = vec;
45348 	      break;
45349 
45350 	    case 1:
45351 	    case 3:
45352 	      tmp = gen_reg_rtx (mode);
45353 	      emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45354 					    GEN_INT (elt), GEN_INT (elt),
45355 					    GEN_INT (elt), GEN_INT (elt)));
45356 	      break;
45357 
45358 	    case 2:
45359 	      tmp = gen_reg_rtx (mode);
45360 	      emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45361 	      break;
45362 
45363 	    default:
45364 	      gcc_unreachable ();
45365 	    }
45366 	  vec = tmp;
45367 	  use_vec_extr = true;
45368 	  elt = 0;
45369 	}
45370       else
45371 	{
45372 	  /* For SSE1, we have to reuse the V4SF code.  */
45373 	  ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45374 				      gen_lowpart (V4SFmode, vec), elt);
45375 	  return;
45376 	}
45377       break;
45378 
45379     case V8HImode:
45380       use_vec_extr = TARGET_SSE2;
45381       break;
45382     case V4HImode:
45383       use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45384       break;
45385 
45386     case V16QImode:
45387       use_vec_extr = TARGET_SSE4_1;
45388       break;
45389 
45390     case V8SFmode:
45391       if (TARGET_AVX)
45392 	{
45393 	  tmp = gen_reg_rtx (V4SFmode);
45394 	  if (elt < 4)
45395 	    emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45396 	  else
45397 	    emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45398 	  ix86_expand_vector_extract (false, target, tmp, elt & 3);
45399 	  return;
45400 	}
45401       break;
45402 
45403     case V4DFmode:
45404       if (TARGET_AVX)
45405 	{
45406 	  tmp = gen_reg_rtx (V2DFmode);
45407 	  if (elt < 2)
45408 	    emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45409 	  else
45410 	    emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45411 	  ix86_expand_vector_extract (false, target, tmp, elt & 1);
45412 	  return;
45413 	}
45414       break;
45415 
45416     case V32QImode:
45417       if (TARGET_AVX)
45418 	{
45419 	  tmp = gen_reg_rtx (V16QImode);
45420 	  if (elt < 16)
45421 	    emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45422 	  else
45423 	    emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45424 	  ix86_expand_vector_extract (false, target, tmp, elt & 15);
45425 	  return;
45426 	}
45427       break;
45428 
45429     case V16HImode:
45430       if (TARGET_AVX)
45431 	{
45432 	  tmp = gen_reg_rtx (V8HImode);
45433 	  if (elt < 8)
45434 	    emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45435 	  else
45436 	    emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45437 	  ix86_expand_vector_extract (false, target, tmp, elt & 7);
45438 	  return;
45439 	}
45440       break;
45441 
45442     case V8SImode:
45443       if (TARGET_AVX)
45444 	{
45445 	  tmp = gen_reg_rtx (V4SImode);
45446 	  if (elt < 4)
45447 	    emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45448 	  else
45449 	    emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45450 	  ix86_expand_vector_extract (false, target, tmp, elt & 3);
45451 	  return;
45452 	}
45453       break;
45454 
45455     case V4DImode:
45456       if (TARGET_AVX)
45457 	{
45458 	  tmp = gen_reg_rtx (V2DImode);
45459 	  if (elt < 2)
45460 	    emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45461 	  else
45462 	    emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45463 	  ix86_expand_vector_extract (false, target, tmp, elt & 1);
45464 	  return;
45465 	}
45466       break;
45467 
45468     case V32HImode:
45469       if (TARGET_AVX512BW)
45470 	{
45471 	  tmp = gen_reg_rtx (V16HImode);
45472 	  if (elt < 16)
45473 	    emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45474 	  else
45475 	    emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45476 	  ix86_expand_vector_extract (false, target, tmp, elt & 15);
45477 	  return;
45478 	}
45479       break;
45480 
45481     case V64QImode:
45482       if (TARGET_AVX512BW)
45483 	{
45484 	  tmp = gen_reg_rtx (V32QImode);
45485 	  if (elt < 32)
45486 	    emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45487 	  else
45488 	    emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45489 	  ix86_expand_vector_extract (false, target, tmp, elt & 31);
45490 	  return;
45491 	}
45492       break;
45493 
45494     case V16SFmode:
45495       tmp = gen_reg_rtx (V8SFmode);
45496       if (elt < 8)
45497 	emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45498       else
45499 	emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45500       ix86_expand_vector_extract (false, target, tmp, elt & 7);
45501       return;
45502 
45503     case V8DFmode:
45504       tmp = gen_reg_rtx (V4DFmode);
45505       if (elt < 4)
45506 	emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45507       else
45508 	emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45509       ix86_expand_vector_extract (false, target, tmp, elt & 3);
45510       return;
45511 
45512     case V16SImode:
45513       tmp = gen_reg_rtx (V8SImode);
45514       if (elt < 8)
45515 	emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45516       else
45517 	emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45518       ix86_expand_vector_extract (false, target, tmp, elt & 7);
45519       return;
45520 
45521     case V8DImode:
45522       tmp = gen_reg_rtx (V4DImode);
45523       if (elt < 4)
45524 	emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45525       else
45526 	emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45527       ix86_expand_vector_extract (false, target, tmp, elt & 3);
45528       return;
45529 
45530     case V8QImode:
45531       /* ??? Could extract the appropriate HImode element and shift.  */
45532     default:
45533       break;
45534     }
45535 
45536   if (use_vec_extr)
45537     {
45538       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45539       tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45540 
45541       /* Let the rtl optimizers know about the zero extension performed.  */
45542       if (inner_mode == QImode || inner_mode == HImode)
45543 	{
45544 	  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45545 	  target = gen_lowpart (SImode, target);
45546 	}
45547 
45548       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45549     }
45550   else
45551     {
45552       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45553 
45554       emit_move_insn (mem, vec);
45555 
45556       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45557       emit_move_insn (target, tmp);
45558     }
45559 }
45560 
45561 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45562    to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45563    The upper bits of DEST are undefined, though they shouldn't cause
45564    exceptions (some bits from src or all zeros are ok).  */
45565 
45566 static void
45567 emit_reduc_half (rtx dest, rtx src, int i)
45568 {
45569   rtx tem, d = dest;
45570   switch (GET_MODE (src))
45571     {
45572     case V4SFmode:
45573       if (i == 128)
45574 	tem = gen_sse_movhlps (dest, src, src);
45575       else
45576 	tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45577 				   GEN_INT (1 + 4), GEN_INT (1 + 4));
45578       break;
45579     case V2DFmode:
45580       tem = gen_vec_interleave_highv2df (dest, src, src);
45581       break;
45582     case V16QImode:
45583     case V8HImode:
45584     case V4SImode:
45585     case V2DImode:
45586       d = gen_reg_rtx (V1TImode);
45587       tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45588 				GEN_INT (i / 2));
45589       break;
45590     case V8SFmode:
45591       if (i == 256)
45592 	tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45593       else
45594 	tem = gen_avx_shufps256 (dest, src, src,
45595 				 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45596       break;
45597     case V4DFmode:
45598       if (i == 256)
45599 	tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45600       else
45601 	tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45602       break;
45603     case V32QImode:
45604     case V16HImode:
45605     case V8SImode:
45606     case V4DImode:
45607       if (i == 256)
45608 	{
45609 	  if (GET_MODE (dest) != V4DImode)
45610 	    d = gen_reg_rtx (V4DImode);
45611 	  tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45612 				   gen_lowpart (V4DImode, src),
45613 				   const1_rtx);
45614 	}
45615       else
45616 	{
45617 	  d = gen_reg_rtx (V2TImode);
45618 	  tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45619 				    GEN_INT (i / 2));
45620 	}
45621       break;
45622     case V64QImode:
45623     case V32HImode:
45624     case V16SImode:
45625     case V16SFmode:
45626     case V8DImode:
45627     case V8DFmode:
45628       if (i > 128)
45629 	tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45630 				      gen_lowpart (V16SImode, src),
45631 				      gen_lowpart (V16SImode, src),
45632 				      GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45633 				      GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45634 				      GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45635 				      GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45636 				      GEN_INT (0xC), GEN_INT (0xD),
45637 				      GEN_INT (0xE), GEN_INT (0xF),
45638 				      GEN_INT (0x10), GEN_INT (0x11),
45639 				      GEN_INT (0x12), GEN_INT (0x13),
45640 				      GEN_INT (0x14), GEN_INT (0x15),
45641 				      GEN_INT (0x16), GEN_INT (0x17));
45642       else
45643 	tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45644 				   gen_lowpart (V16SImode, src),
45645 				   GEN_INT (i == 128 ? 0x2 : 0x1),
45646 				   GEN_INT (0x3),
45647 				   GEN_INT (0x3),
45648 				   GEN_INT (0x3),
45649 				   GEN_INT (i == 128 ? 0x6 : 0x5),
45650 				   GEN_INT (0x7),
45651 				   GEN_INT (0x7),
45652 				   GEN_INT (0x7),
45653 				   GEN_INT (i == 128 ? 0xA : 0x9),
45654 				   GEN_INT (0xB),
45655 				   GEN_INT (0xB),
45656 				   GEN_INT (0xB),
45657 				   GEN_INT (i == 128 ? 0xE : 0xD),
45658 				   GEN_INT (0xF),
45659 				   GEN_INT (0xF),
45660 				   GEN_INT (0xF));
45661       break;
45662     default:
45663       gcc_unreachable ();
45664     }
45665   emit_insn (tem);
45666   if (d != dest)
45667     emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45668 }
45669 
45670 /* Expand a vector reduction.  FN is the binary pattern to reduce;
45671    DEST is the destination; IN is the input vector.  */
45672 
45673 void
45674 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45675 {
45676   rtx half, dst, vec = in;
45677   machine_mode mode = GET_MODE (in);
45678   int i;
45679 
45680   /* SSE4 has a special instruction for V8HImode UMIN reduction.  */
45681   if (TARGET_SSE4_1
45682       && mode == V8HImode
45683       && fn == gen_uminv8hi3)
45684     {
45685       emit_insn (gen_sse4_1_phminposuw (dest, in));
45686       return;
45687     }
45688 
45689   for (i = GET_MODE_BITSIZE (mode);
45690        i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45691        i >>= 1)
45692     {
45693       half = gen_reg_rtx (mode);
45694       emit_reduc_half (half, vec, i);
45695       if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45696 	dst = dest;
45697       else
45698 	dst = gen_reg_rtx (mode);
45699       emit_insn (fn (dst, half, vec));
45700       vec = dst;
45701     }
45702 }
45703 
45704 /* Target hook for scalar_mode_supported_p.  */
45705 static bool
45706 ix86_scalar_mode_supported_p (machine_mode mode)
45707 {
45708   if (DECIMAL_FLOAT_MODE_P (mode))
45709     return default_decimal_float_supported_p ();
45710   else if (mode == TFmode)
45711     return true;
45712   else
45713     return default_scalar_mode_supported_p (mode);
45714 }
45715 
45716 /* Implements target hook vector_mode_supported_p.  */
45717 static bool
45718 ix86_vector_mode_supported_p (machine_mode mode)
45719 {
45720   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45721     return true;
45722   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45723     return true;
45724   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45725     return true;
45726   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45727     return true;
45728   if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45729     return true;
45730   if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45731     return true;
45732   return false;
45733 }
45734 
45735 /* Implement target hook libgcc_floating_mode_supported_p.  */
45736 static bool
45737 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45738 {
45739   switch (mode)
45740     {
45741     case SFmode:
45742     case DFmode:
45743     case XFmode:
45744       return true;
45745 
45746     case TFmode:
45747 #ifdef IX86_NO_LIBGCC_TFMODE
45748       return false;
45749 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45750       return TARGET_LONG_DOUBLE_128;
45751 #else
45752       return true;
45753 #endif
45754 
45755     default:
45756       return false;
45757     }
45758 }
45759 
45760 /* Target hook for c_mode_for_suffix.  */
45761 static machine_mode
45762 ix86_c_mode_for_suffix (char suffix)
45763 {
45764   if (suffix == 'q')
45765     return TFmode;
45766   if (suffix == 'w')
45767     return XFmode;
45768 
45769   return VOIDmode;
45770 }
45771 
45772 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45773 
45774    We do this in the new i386 backend to maintain source compatibility
45775    with the old cc0-based compiler.  */
45776 
45777 static tree
45778 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45779 {
45780   clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45781 			clobbers);
45782   clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45783 			clobbers);
45784   return clobbers;
45785 }
45786 
45787 /* Implements target vector targetm.asm.encode_section_info.  */
45788 
45789 static void ATTRIBUTE_UNUSED
45790 ix86_encode_section_info (tree decl, rtx rtl, int first)
45791 {
45792   default_encode_section_info (decl, rtl, first);
45793 
45794   if (ix86_in_large_data_p (decl))
45795     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45796 }
45797 
45798 /* Worker function for REVERSE_CONDITION.  */
45799 
45800 enum rtx_code
45801 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45802 {
45803   return (mode != CCFPmode && mode != CCFPUmode
45804 	  ? reverse_condition (code)
45805 	  : reverse_condition_maybe_unordered (code));
45806 }
45807 
45808 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45809    to OPERANDS[0].  */
45810 
45811 const char *
45812 output_387_reg_move (rtx insn, rtx *operands)
45813 {
45814   if (REG_P (operands[0]))
45815     {
45816       if (REG_P (operands[1])
45817 	  && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45818 	{
45819 	  if (REGNO (operands[0]) == FIRST_STACK_REG)
45820 	    return output_387_ffreep (operands, 0);
45821 	  return "fstp\t%y0";
45822 	}
45823       if (STACK_TOP_P (operands[0]))
45824 	return "fld%Z1\t%y1";
45825       return "fst\t%y0";
45826     }
45827   else if (MEM_P (operands[0]))
45828     {
45829       gcc_assert (REG_P (operands[1]));
45830       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45831 	return "fstp%Z0\t%y0";
45832       else
45833 	{
45834 	  /* There is no non-popping store to memory for XFmode.
45835 	     So if we need one, follow the store with a load.  */
45836 	  if (GET_MODE (operands[0]) == XFmode)
45837 	    return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45838 	  else
45839 	    return "fst%Z0\t%y0";
45840 	}
45841     }
45842   else
45843     gcc_unreachable();
45844 }
45845 
45846 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45847    FP status register is set.  */
45848 
45849 void
45850 ix86_emit_fp_unordered_jump (rtx label)
45851 {
45852   rtx reg = gen_reg_rtx (HImode);
45853   rtx temp;
45854 
45855   emit_insn (gen_x86_fnstsw_1 (reg));
45856 
45857   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45858     {
45859       emit_insn (gen_x86_sahf_1 (reg));
45860 
45861       temp = gen_rtx_REG (CCmode, FLAGS_REG);
45862       temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45863     }
45864   else
45865     {
45866       emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45867 
45868       temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45869       temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45870     }
45871 
45872   temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45873 			      gen_rtx_LABEL_REF (VOIDmode, label),
45874 			      pc_rtx);
45875   temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45876 
45877   emit_jump_insn (temp);
45878   predict_jump (REG_BR_PROB_BASE * 10 / 100);
45879 }
45880 
45881 /* Output code to perform a log1p XFmode calculation.  */
45882 
45883 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45884 {
45885   rtx_code_label *label1 = gen_label_rtx ();
45886   rtx_code_label *label2 = gen_label_rtx ();
45887 
45888   rtx tmp = gen_reg_rtx (XFmode);
45889   rtx tmp2 = gen_reg_rtx (XFmode);
45890   rtx test;
45891 
45892   emit_insn (gen_absxf2 (tmp, op1));
45893   test = gen_rtx_GE (VOIDmode, tmp,
45894     CONST_DOUBLE_FROM_REAL_VALUE (
45895        REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45896        XFmode));
45897   emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45898 
45899   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45900   emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45901   emit_jump (label2);
45902 
45903   emit_label (label1);
45904   emit_move_insn (tmp, CONST1_RTX (XFmode));
45905   emit_insn (gen_addxf3 (tmp, op1, tmp));
45906   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45907   emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45908 
45909   emit_label (label2);
45910 }
45911 
45912 /* Emit code for round calculation.  */
45913 void ix86_emit_i387_round (rtx op0, rtx op1)
45914 {
45915   machine_mode inmode = GET_MODE (op1);
45916   machine_mode outmode = GET_MODE (op0);
45917   rtx e1, e2, res, tmp, tmp1, half;
45918   rtx scratch = gen_reg_rtx (HImode);
45919   rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45920   rtx_code_label *jump_label = gen_label_rtx ();
45921   rtx insn;
45922   rtx (*gen_abs) (rtx, rtx);
45923   rtx (*gen_neg) (rtx, rtx);
45924 
45925   switch (inmode)
45926     {
45927     case SFmode:
45928       gen_abs = gen_abssf2;
45929       break;
45930     case DFmode:
45931       gen_abs = gen_absdf2;
45932       break;
45933     case XFmode:
45934       gen_abs = gen_absxf2;
45935       break;
45936     default:
45937       gcc_unreachable ();
45938     }
45939 
45940   switch (outmode)
45941     {
45942     case SFmode:
45943       gen_neg = gen_negsf2;
45944       break;
45945     case DFmode:
45946       gen_neg = gen_negdf2;
45947       break;
45948     case XFmode:
45949       gen_neg = gen_negxf2;
45950       break;
45951     case HImode:
45952       gen_neg = gen_neghi2;
45953       break;
45954     case SImode:
45955       gen_neg = gen_negsi2;
45956       break;
45957     case DImode:
45958       gen_neg = gen_negdi2;
45959       break;
45960     default:
45961       gcc_unreachable ();
45962     }
45963 
45964   e1 = gen_reg_rtx (inmode);
45965   e2 = gen_reg_rtx (inmode);
45966   res = gen_reg_rtx (outmode);
45967 
45968   half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45969 
45970   /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45971 
45972   /* scratch = fxam(op1) */
45973   emit_insn (gen_rtx_SET (VOIDmode, scratch,
45974 			  gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45975 					  UNSPEC_FXAM)));
45976   /* e1 = fabs(op1) */
45977   emit_insn (gen_abs (e1, op1));
45978 
45979   /* e2 = e1 + 0.5 */
45980   half = force_reg (inmode, half);
45981   emit_insn (gen_rtx_SET (VOIDmode, e2,
45982 			  gen_rtx_PLUS (inmode, e1, half)));
45983 
45984   /* res = floor(e2) */
45985   if (inmode != XFmode)
45986     {
45987       tmp1 = gen_reg_rtx (XFmode);
45988 
45989       emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45990 			      gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45991     }
45992   else
45993     tmp1 = e2;
45994 
45995   switch (outmode)
45996     {
45997     case SFmode:
45998     case DFmode:
45999       {
46000 	rtx tmp0 = gen_reg_rtx (XFmode);
46001 
46002 	emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
46003 
46004 	emit_insn (gen_rtx_SET (VOIDmode, res,
46005 				gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
46006 						UNSPEC_TRUNC_NOOP)));
46007       }
46008       break;
46009     case XFmode:
46010       emit_insn (gen_frndintxf2_floor (res, tmp1));
46011       break;
46012     case HImode:
46013       emit_insn (gen_lfloorxfhi2 (res, tmp1));
46014       break;
46015     case SImode:
46016       emit_insn (gen_lfloorxfsi2 (res, tmp1));
46017       break;
46018     case DImode:
46019       emit_insn (gen_lfloorxfdi2 (res, tmp1));
46020 	break;
46021     default:
46022       gcc_unreachable ();
46023     }
46024 
46025   /* flags = signbit(a) */
46026   emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
46027 
46028   /* if (flags) then res = -res */
46029   tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
46030 			      gen_rtx_EQ (VOIDmode, flags, const0_rtx),
46031 			      gen_rtx_LABEL_REF (VOIDmode, jump_label),
46032 			      pc_rtx);
46033   insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46034   predict_jump (REG_BR_PROB_BASE * 50 / 100);
46035   JUMP_LABEL (insn) = jump_label;
46036 
46037   emit_insn (gen_neg (res, res));
46038 
46039   emit_label (jump_label);
46040   LABEL_NUSES (jump_label) = 1;
46041 
46042   emit_move_insn (op0, res);
46043 }
46044 
46045 /* Output code to perform a Newton-Rhapson approximation of a single precision
46046    floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm].  */
46047 
46048 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
46049 {
46050   rtx x0, x1, e0, e1;
46051 
46052   x0 = gen_reg_rtx (mode);
46053   e0 = gen_reg_rtx (mode);
46054   e1 = gen_reg_rtx (mode);
46055   x1 = gen_reg_rtx (mode);
46056 
46057   /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
46058 
46059   b = force_reg (mode, b);
46060 
46061   /* x0 = rcp(b) estimate */
46062   if (mode == V16SFmode || mode == V8DFmode)
46063     emit_insn (gen_rtx_SET (VOIDmode, x0,
46064 			    gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46065 					    UNSPEC_RCP14)));
46066   else
46067     emit_insn (gen_rtx_SET (VOIDmode, x0,
46068 			    gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46069 					    UNSPEC_RCP)));
46070 
46071   /* e0 = x0 * b */
46072   emit_insn (gen_rtx_SET (VOIDmode, e0,
46073 			  gen_rtx_MULT (mode, x0, b)));
46074 
46075   /* e0 = x0 * e0 */
46076   emit_insn (gen_rtx_SET (VOIDmode, e0,
46077 			  gen_rtx_MULT (mode, x0, e0)));
46078 
46079   /* e1 = x0 + x0 */
46080   emit_insn (gen_rtx_SET (VOIDmode, e1,
46081 			  gen_rtx_PLUS (mode, x0, x0)));
46082 
46083   /* x1 = e1 - e0 */
46084   emit_insn (gen_rtx_SET (VOIDmode, x1,
46085 			  gen_rtx_MINUS (mode, e1, e0)));
46086 
46087   /* res = a * x1 */
46088   emit_insn (gen_rtx_SET (VOIDmode, res,
46089 			  gen_rtx_MULT (mode, a, x1)));
46090 }
46091 
46092 /* Output code to perform a Newton-Rhapson approximation of a
46093    single precision floating point [reciprocal] square root.  */
46094 
46095 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
46096 			 bool recip)
46097 {
46098   rtx x0, e0, e1, e2, e3, mthree, mhalf;
46099   REAL_VALUE_TYPE r;
46100   int unspec;
46101 
46102   x0 = gen_reg_rtx (mode);
46103   e0 = gen_reg_rtx (mode);
46104   e1 = gen_reg_rtx (mode);
46105   e2 = gen_reg_rtx (mode);
46106   e3 = gen_reg_rtx (mode);
46107 
46108   real_from_integer (&r, VOIDmode, -3, SIGNED);
46109   mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46110 
46111   real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
46112   mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46113   unspec = UNSPEC_RSQRT;
46114 
46115   if (VECTOR_MODE_P (mode))
46116     {
46117       mthree = ix86_build_const_vector (mode, true, mthree);
46118       mhalf = ix86_build_const_vector (mode, true, mhalf);
46119       /* There is no 512-bit rsqrt.  There is however rsqrt14.  */
46120       if (GET_MODE_SIZE (mode) == 64)
46121 	unspec = UNSPEC_RSQRT14;
46122     }
46123 
46124   /* sqrt(a)  = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
46125      rsqrt(a) = -0.5     * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
46126 
46127   a = force_reg (mode, a);
46128 
46129   /* x0 = rsqrt(a) estimate */
46130   emit_insn (gen_rtx_SET (VOIDmode, x0,
46131 			  gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
46132 					  unspec)));
46133 
46134   /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0).  */
46135   if (!recip)
46136     {
46137       rtx zero, mask;
46138 
46139       zero = gen_reg_rtx (mode);
46140       mask = gen_reg_rtx (mode);
46141 
46142       zero = force_reg (mode, CONST0_RTX(mode));
46143 
46144       /* Handle masked compare.  */
46145       if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
46146 	{
46147 	  mask = gen_reg_rtx (HImode);
46148 	  /* Imm value 0x4 corresponds to not-equal comparison.  */
46149 	  emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
46150 	  emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
46151 	}
46152       else
46153 	{
46154 	  emit_insn (gen_rtx_SET (VOIDmode, mask,
46155 				  gen_rtx_NE (mode, zero, a)));
46156 
46157 	  emit_insn (gen_rtx_SET (VOIDmode, x0,
46158 				  gen_rtx_AND (mode, x0, mask)));
46159 	}
46160     }
46161 
46162   /* e0 = x0 * a */
46163   emit_insn (gen_rtx_SET (VOIDmode, e0,
46164 			  gen_rtx_MULT (mode, x0, a)));
46165   /* e1 = e0 * x0 */
46166   emit_insn (gen_rtx_SET (VOIDmode, e1,
46167 			  gen_rtx_MULT (mode, e0, x0)));
46168 
46169   /* e2 = e1 - 3. */
46170   mthree = force_reg (mode, mthree);
46171   emit_insn (gen_rtx_SET (VOIDmode, e2,
46172 			  gen_rtx_PLUS (mode, e1, mthree)));
46173 
46174   mhalf = force_reg (mode, mhalf);
46175   if (recip)
46176     /* e3 = -.5 * x0 */
46177     emit_insn (gen_rtx_SET (VOIDmode, e3,
46178 			    gen_rtx_MULT (mode, x0, mhalf)));
46179   else
46180     /* e3 = -.5 * e0 */
46181     emit_insn (gen_rtx_SET (VOIDmode, e3,
46182 			    gen_rtx_MULT (mode, e0, mhalf)));
46183   /* ret = e2 * e3 */
46184   emit_insn (gen_rtx_SET (VOIDmode, res,
46185 			  gen_rtx_MULT (mode, e2, e3)));
46186 }
46187 
46188 #ifdef TARGET_SOLARIS
46189 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
46190 
46191 static void
46192 i386_solaris_elf_named_section (const char *name, unsigned int flags,
46193 				tree decl)
46194 {
46195   /* With Binutils 2.15, the "@unwind" marker must be specified on
46196      every occurrence of the ".eh_frame" section, not just the first
46197      one.  */
46198   if (TARGET_64BIT
46199       && strcmp (name, ".eh_frame") == 0)
46200     {
46201       fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
46202 	       flags & SECTION_WRITE ? "aw" : "a");
46203       return;
46204     }
46205 
46206 #ifndef USE_GAS
46207   if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
46208     {
46209       solaris_elf_asm_comdat_section (name, flags, decl);
46210       return;
46211     }
46212 #endif
46213 
46214   default_elf_asm_named_section (name, flags, decl);
46215 }
46216 #endif /* TARGET_SOLARIS */
46217 
46218 /* Return the mangling of TYPE if it is an extended fundamental type.  */
46219 
46220 static const char *
46221 ix86_mangle_type (const_tree type)
46222 {
46223   type = TYPE_MAIN_VARIANT (type);
46224 
46225   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
46226       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
46227     return NULL;
46228 
46229   switch (TYPE_MODE (type))
46230     {
46231     case TFmode:
46232       /* __float128 is "g".  */
46233       return "g";
46234     case XFmode:
46235       /* "long double" or __float80 is "e".  */
46236       return "e";
46237     default:
46238       return NULL;
46239     }
46240 }
46241 
46242 /* For 32-bit code we can save PIC register setup by using
46243    __stack_chk_fail_local hidden function instead of calling
46244    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
46245    register, so it is better to call __stack_chk_fail directly.  */
46246 
46247 static tree ATTRIBUTE_UNUSED
46248 ix86_stack_protect_fail (void)
46249 {
46250   return TARGET_64BIT
46251 	 ? default_external_stack_protect_fail ()
46252 	 : default_hidden_stack_protect_fail ();
46253 }
46254 
46255 /* Select a format to encode pointers in exception handling data.  CODE
46256    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
46257    true if the symbol may be affected by dynamic relocations.
46258 
46259    ??? All x86 object file formats are capable of representing this.
46260    After all, the relocation needed is the same as for the call insn.
46261    Whether or not a particular assembler allows us to enter such, I
46262    guess we'll have to see.  */
46263 int
46264 asm_preferred_eh_data_format (int code, int global)
46265 {
46266   if (flag_pic)
46267     {
46268       int type = DW_EH_PE_sdata8;
46269       if (!TARGET_64BIT
46270 	  || ix86_cmodel == CM_SMALL_PIC
46271 	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
46272 	type = DW_EH_PE_sdata4;
46273       return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
46274     }
46275   if (ix86_cmodel == CM_SMALL
46276       || (ix86_cmodel == CM_MEDIUM && code))
46277     return DW_EH_PE_udata4;
46278   return DW_EH_PE_absptr;
46279 }
46280 
46281 /* Expand copysign from SIGN to the positive value ABS_VALUE
46282    storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
46283    the sign-bit.  */
46284 static void
46285 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46286 {
46287   machine_mode mode = GET_MODE (sign);
46288   rtx sgn = gen_reg_rtx (mode);
46289   if (mask == NULL_RTX)
46290     {
46291       machine_mode vmode;
46292 
46293       if (mode == SFmode)
46294 	vmode = V4SFmode;
46295       else if (mode == DFmode)
46296 	vmode = V2DFmode;
46297       else
46298 	vmode = mode;
46299 
46300       mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46301       if (!VECTOR_MODE_P (mode))
46302 	{
46303 	  /* We need to generate a scalar mode mask in this case.  */
46304 	  rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46305 	  tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46306 	  mask = gen_reg_rtx (mode);
46307 	  emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46308 	}
46309     }
46310   else
46311     mask = gen_rtx_NOT (mode, mask);
46312   emit_insn (gen_rtx_SET (VOIDmode, sgn,
46313 			  gen_rtx_AND (mode, mask, sign)));
46314   emit_insn (gen_rtx_SET (VOIDmode, result,
46315 			  gen_rtx_IOR (mode, abs_value, sgn)));
46316 }
46317 
46318 /* Expand fabs (OP0) and return a new rtx that holds the result.  The
46319    mask for masking out the sign-bit is stored in *SMASK, if that is
46320    non-null.  */
46321 static rtx
46322 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46323 {
46324   machine_mode vmode, mode = GET_MODE (op0);
46325   rtx xa, mask;
46326 
46327   xa = gen_reg_rtx (mode);
46328   if (mode == SFmode)
46329     vmode = V4SFmode;
46330   else if (mode == DFmode)
46331     vmode = V2DFmode;
46332   else
46333     vmode = mode;
46334   mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46335   if (!VECTOR_MODE_P (mode))
46336     {
46337       /* We need to generate a scalar mode mask in this case.  */
46338       rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46339       tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46340       mask = gen_reg_rtx (mode);
46341       emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46342     }
46343   emit_insn (gen_rtx_SET (VOIDmode, xa,
46344 			  gen_rtx_AND (mode, op0, mask)));
46345 
46346   if (smask)
46347     *smask = mask;
46348 
46349   return xa;
46350 }
46351 
46352 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46353    swapping the operands if SWAP_OPERANDS is true.  The expanded
46354    code is a forward jump to a newly created label in case the
46355    comparison is true.  The generated label rtx is returned.  */
46356 static rtx_code_label *
46357 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46358                                   bool swap_operands)
46359 {
46360   machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46361   rtx_code_label *label;
46362   rtx tmp;
46363 
46364   if (swap_operands)
46365     std::swap (op0, op1);
46366 
46367   label = gen_label_rtx ();
46368   tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46369   emit_insn (gen_rtx_SET (VOIDmode, tmp,
46370 			  gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46371   tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46372   tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46373 			      gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46374   tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46375   JUMP_LABEL (tmp) = label;
46376 
46377   return label;
46378 }
46379 
46380 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46381    using comparison code CODE.  Operands are swapped for the comparison if
46382    SWAP_OPERANDS is true.  Returns a rtx for the generated mask.  */
46383 static rtx
46384 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46385 			      bool swap_operands)
46386 {
46387   rtx (*insn)(rtx, rtx, rtx, rtx);
46388   machine_mode mode = GET_MODE (op0);
46389   rtx mask = gen_reg_rtx (mode);
46390 
46391   if (swap_operands)
46392     std::swap (op0, op1);
46393 
46394   insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46395 
46396   emit_insn (insn (mask, op0, op1,
46397 		   gen_rtx_fmt_ee (code, mode, op0, op1)));
46398   return mask;
46399 }
46400 
46401 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46402    of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
46403 static rtx
46404 ix86_gen_TWO52 (machine_mode mode)
46405 {
46406   REAL_VALUE_TYPE TWO52r;
46407   rtx TWO52;
46408 
46409   real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46410   TWO52 = const_double_from_real_value (TWO52r, mode);
46411   TWO52 = force_reg (mode, TWO52);
46412 
46413   return TWO52;
46414 }
46415 
46416 /* Expand SSE sequence for computing lround from OP1 storing
46417    into OP0.  */
46418 void
46419 ix86_expand_lround (rtx op0, rtx op1)
46420 {
46421   /* C code for the stuff we're doing below:
46422        tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46423        return (long)tmp;
46424    */
46425   machine_mode mode = GET_MODE (op1);
46426   const struct real_format *fmt;
46427   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46428   rtx adj;
46429 
46430   /* load nextafter (0.5, 0.0) */
46431   fmt = REAL_MODE_FORMAT (mode);
46432   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46433   REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46434 
46435   /* adj = copysign (0.5, op1) */
46436   adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46437   ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46438 
46439   /* adj = op1 + adj */
46440   adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46441 
46442   /* op0 = (imode)adj */
46443   expand_fix (op0, adj, 0);
46444 }
46445 
46446 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46447    into OPERAND0.  */
46448 void
46449 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46450 {
46451   /* C code for the stuff we're doing below (for do_floor):
46452 	xi = (long)op1;
46453         xi -= (double)xi > op1 ? 1 : 0;
46454         return xi;
46455    */
46456   machine_mode fmode = GET_MODE (op1);
46457   machine_mode imode = GET_MODE (op0);
46458   rtx ireg, freg, tmp;
46459   rtx_code_label *label;
46460 
46461   /* reg = (long)op1 */
46462   ireg = gen_reg_rtx (imode);
46463   expand_fix (ireg, op1, 0);
46464 
46465   /* freg = (double)reg */
46466   freg = gen_reg_rtx (fmode);
46467   expand_float (freg, ireg, 0);
46468 
46469   /* ireg = (freg > op1) ? ireg - 1 : ireg */
46470   label = ix86_expand_sse_compare_and_jump (UNLE,
46471 					    freg, op1, !do_floor);
46472   tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46473 			     ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46474   emit_move_insn (ireg, tmp);
46475 
46476   emit_label (label);
46477   LABEL_NUSES (label) = 1;
46478 
46479   emit_move_insn (op0, ireg);
46480 }
46481 
46482 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46483    result in OPERAND0.  */
46484 void
46485 ix86_expand_rint (rtx operand0, rtx operand1)
46486 {
46487   /* C code for the stuff we're doing below:
46488 	xa = fabs (operand1);
46489         if (!isless (xa, 2**52))
46490 	  return operand1;
46491         xa = xa + 2**52 - 2**52;
46492         return copysign (xa, operand1);
46493    */
46494   machine_mode mode = GET_MODE (operand0);
46495   rtx res, xa, TWO52, mask;
46496   rtx_code_label *label;
46497 
46498   res = gen_reg_rtx (mode);
46499   emit_move_insn (res, operand1);
46500 
46501   /* xa = abs (operand1) */
46502   xa = ix86_expand_sse_fabs (res, &mask);
46503 
46504   /* if (!isless (xa, TWO52)) goto label; */
46505   TWO52 = ix86_gen_TWO52 (mode);
46506   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46507 
46508   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46509   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46510 
46511   ix86_sse_copysign_to_positive (res, xa, res, mask);
46512 
46513   emit_label (label);
46514   LABEL_NUSES (label) = 1;
46515 
46516   emit_move_insn (operand0, res);
46517 }
46518 
46519 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46520    into OPERAND0.  */
46521 void
46522 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46523 {
46524   /* C code for the stuff we expand below.
46525         double xa = fabs (x), x2;
46526         if (!isless (xa, TWO52))
46527           return x;
46528         xa = xa + TWO52 - TWO52;
46529         x2 = copysign (xa, x);
46530      Compensate.  Floor:
46531         if (x2 > x)
46532           x2 -= 1;
46533      Compensate.  Ceil:
46534         if (x2 < x)
46535           x2 -= -1;
46536         return x2;
46537    */
46538   machine_mode mode = GET_MODE (operand0);
46539   rtx xa, TWO52, tmp, one, res, mask;
46540   rtx_code_label *label;
46541 
46542   TWO52 = ix86_gen_TWO52 (mode);
46543 
46544   /* Temporary for holding the result, initialized to the input
46545      operand to ease control flow.  */
46546   res = gen_reg_rtx (mode);
46547   emit_move_insn (res, operand1);
46548 
46549   /* xa = abs (operand1) */
46550   xa = ix86_expand_sse_fabs (res, &mask);
46551 
46552   /* if (!isless (xa, TWO52)) goto label; */
46553   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46554 
46555   /* xa = xa + TWO52 - TWO52; */
46556   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46557   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46558 
46559   /* xa = copysign (xa, operand1) */
46560   ix86_sse_copysign_to_positive (xa, xa, res, mask);
46561 
46562   /* generate 1.0 or -1.0 */
46563   one = force_reg (mode,
46564 	           const_double_from_real_value (do_floor
46565 						 ? dconst1 : dconstm1, mode));
46566 
46567   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46568   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46569   emit_insn (gen_rtx_SET (VOIDmode, tmp,
46570                           gen_rtx_AND (mode, one, tmp)));
46571   /* We always need to subtract here to preserve signed zero.  */
46572   tmp = expand_simple_binop (mode, MINUS,
46573 			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46574   emit_move_insn (res, tmp);
46575 
46576   emit_label (label);
46577   LABEL_NUSES (label) = 1;
46578 
46579   emit_move_insn (operand0, res);
46580 }
46581 
46582 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46583    into OPERAND0.  */
46584 void
46585 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46586 {
46587   /* C code for the stuff we expand below.
46588 	double xa = fabs (x), x2;
46589         if (!isless (xa, TWO52))
46590           return x;
46591 	x2 = (double)(long)x;
46592      Compensate.  Floor:
46593 	if (x2 > x)
46594 	  x2 -= 1;
46595      Compensate.  Ceil:
46596 	if (x2 < x)
46597 	  x2 += 1;
46598 	if (HONOR_SIGNED_ZEROS (mode))
46599 	  return copysign (x2, x);
46600 	return x2;
46601    */
46602   machine_mode mode = GET_MODE (operand0);
46603   rtx xa, xi, TWO52, tmp, one, res, mask;
46604   rtx_code_label *label;
46605 
46606   TWO52 = ix86_gen_TWO52 (mode);
46607 
46608   /* Temporary for holding the result, initialized to the input
46609      operand to ease control flow.  */
46610   res = gen_reg_rtx (mode);
46611   emit_move_insn (res, operand1);
46612 
46613   /* xa = abs (operand1) */
46614   xa = ix86_expand_sse_fabs (res, &mask);
46615 
46616   /* if (!isless (xa, TWO52)) goto label; */
46617   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46618 
46619   /* xa = (double)(long)x */
46620   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46621   expand_fix (xi, res, 0);
46622   expand_float (xa, xi, 0);
46623 
46624   /* generate 1.0 */
46625   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46626 
46627   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46628   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46629   emit_insn (gen_rtx_SET (VOIDmode, tmp,
46630                           gen_rtx_AND (mode, one, tmp)));
46631   tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46632 			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46633   emit_move_insn (res, tmp);
46634 
46635   if (HONOR_SIGNED_ZEROS (mode))
46636     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46637 
46638   emit_label (label);
46639   LABEL_NUSES (label) = 1;
46640 
46641   emit_move_insn (operand0, res);
46642 }
46643 
46644 /* Expand SSE sequence for computing round from OPERAND1 storing
46645    into OPERAND0.  Sequence that works without relying on DImode truncation
46646    via cvttsd2siq that is only available on 64bit targets.  */
46647 void
46648 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46649 {
46650   /* C code for the stuff we expand below.
46651         double xa = fabs (x), xa2, x2;
46652         if (!isless (xa, TWO52))
46653           return x;
46654      Using the absolute value and copying back sign makes
46655      -0.0 -> -0.0 correct.
46656         xa2 = xa + TWO52 - TWO52;
46657      Compensate.
46658 	dxa = xa2 - xa;
46659         if (dxa <= -0.5)
46660           xa2 += 1;
46661         else if (dxa > 0.5)
46662           xa2 -= 1;
46663         x2 = copysign (xa2, x);
46664         return x2;
46665    */
46666   machine_mode mode = GET_MODE (operand0);
46667   rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46668   rtx_code_label *label;
46669 
46670   TWO52 = ix86_gen_TWO52 (mode);
46671 
46672   /* Temporary for holding the result, initialized to the input
46673      operand to ease control flow.  */
46674   res = gen_reg_rtx (mode);
46675   emit_move_insn (res, operand1);
46676 
46677   /* xa = abs (operand1) */
46678   xa = ix86_expand_sse_fabs (res, &mask);
46679 
46680   /* if (!isless (xa, TWO52)) goto label; */
46681   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46682 
46683   /* xa2 = xa + TWO52 - TWO52; */
46684   xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46685   xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46686 
46687   /* dxa = xa2 - xa; */
46688   dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46689 
46690   /* generate 0.5, 1.0 and -0.5 */
46691   half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46692   one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46693   mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46694 			       0, OPTAB_DIRECT);
46695 
46696   /* Compensate.  */
46697   tmp = gen_reg_rtx (mode);
46698   /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46699   tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46700   emit_insn (gen_rtx_SET (VOIDmode, tmp,
46701                           gen_rtx_AND (mode, one, tmp)));
46702   xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46703   /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46704   tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46705   emit_insn (gen_rtx_SET (VOIDmode, tmp,
46706                           gen_rtx_AND (mode, one, tmp)));
46707   xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46708 
46709   /* res = copysign (xa2, operand1) */
46710   ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46711 
46712   emit_label (label);
46713   LABEL_NUSES (label) = 1;
46714 
46715   emit_move_insn (operand0, res);
46716 }
46717 
46718 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46719    into OPERAND0.  */
46720 void
46721 ix86_expand_trunc (rtx operand0, rtx operand1)
46722 {
46723   /* C code for SSE variant we expand below.
46724         double xa = fabs (x), x2;
46725         if (!isless (xa, TWO52))
46726           return x;
46727         x2 = (double)(long)x;
46728 	if (HONOR_SIGNED_ZEROS (mode))
46729 	  return copysign (x2, x);
46730 	return x2;
46731    */
46732   machine_mode mode = GET_MODE (operand0);
46733   rtx xa, xi, TWO52, res, mask;
46734   rtx_code_label *label;
46735 
46736   TWO52 = ix86_gen_TWO52 (mode);
46737 
46738   /* Temporary for holding the result, initialized to the input
46739      operand to ease control flow.  */
46740   res = gen_reg_rtx (mode);
46741   emit_move_insn (res, operand1);
46742 
46743   /* xa = abs (operand1) */
46744   xa = ix86_expand_sse_fabs (res, &mask);
46745 
46746   /* if (!isless (xa, TWO52)) goto label; */
46747   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46748 
46749   /* x = (double)(long)x */
46750   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46751   expand_fix (xi, res, 0);
46752   expand_float (res, xi, 0);
46753 
46754   if (HONOR_SIGNED_ZEROS (mode))
46755     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46756 
46757   emit_label (label);
46758   LABEL_NUSES (label) = 1;
46759 
46760   emit_move_insn (operand0, res);
46761 }
46762 
46763 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46764    into OPERAND0.  */
46765 void
46766 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46767 {
46768   machine_mode mode = GET_MODE (operand0);
46769   rtx xa, mask, TWO52, one, res, smask, tmp;
46770   rtx_code_label *label;
46771 
46772   /* C code for SSE variant we expand below.
46773         double xa = fabs (x), x2;
46774         if (!isless (xa, TWO52))
46775           return x;
46776         xa2 = xa + TWO52 - TWO52;
46777      Compensate:
46778         if (xa2 > xa)
46779           xa2 -= 1.0;
46780         x2 = copysign (xa2, x);
46781         return x2;
46782    */
46783 
46784   TWO52 = ix86_gen_TWO52 (mode);
46785 
46786   /* Temporary for holding the result, initialized to the input
46787      operand to ease control flow.  */
46788   res = gen_reg_rtx (mode);
46789   emit_move_insn (res, operand1);
46790 
46791   /* xa = abs (operand1) */
46792   xa = ix86_expand_sse_fabs (res, &smask);
46793 
46794   /* if (!isless (xa, TWO52)) goto label; */
46795   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46796 
46797   /* res = xa + TWO52 - TWO52; */
46798   tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46799   tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46800   emit_move_insn (res, tmp);
46801 
46802   /* generate 1.0 */
46803   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46804 
46805   /* Compensate: res = xa2 - (res > xa ? 1 : 0)  */
46806   mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46807   emit_insn (gen_rtx_SET (VOIDmode, mask,
46808                           gen_rtx_AND (mode, mask, one)));
46809   tmp = expand_simple_binop (mode, MINUS,
46810 			     res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46811   emit_move_insn (res, tmp);
46812 
46813   /* res = copysign (res, operand1) */
46814   ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46815 
46816   emit_label (label);
46817   LABEL_NUSES (label) = 1;
46818 
46819   emit_move_insn (operand0, res);
46820 }
46821 
46822 /* Expand SSE sequence for computing round from OPERAND1 storing
46823    into OPERAND0.  */
46824 void
46825 ix86_expand_round (rtx operand0, rtx operand1)
46826 {
46827   /* C code for the stuff we're doing below:
46828         double xa = fabs (x);
46829         if (!isless (xa, TWO52))
46830           return x;
46831         xa = (double)(long)(xa + nextafter (0.5, 0.0));
46832         return copysign (xa, x);
46833    */
46834   machine_mode mode = GET_MODE (operand0);
46835   rtx res, TWO52, xa, xi, half, mask;
46836   rtx_code_label *label;
46837   const struct real_format *fmt;
46838   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46839 
46840   /* Temporary for holding the result, initialized to the input
46841      operand to ease control flow.  */
46842   res = gen_reg_rtx (mode);
46843   emit_move_insn (res, operand1);
46844 
46845   TWO52 = ix86_gen_TWO52 (mode);
46846   xa = ix86_expand_sse_fabs (res, &mask);
46847   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46848 
46849   /* load nextafter (0.5, 0.0) */
46850   fmt = REAL_MODE_FORMAT (mode);
46851   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46852   REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46853 
46854   /* xa = xa + 0.5 */
46855   half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46856   xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46857 
46858   /* xa = (double)(int64_t)xa */
46859   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46860   expand_fix (xi, xa, 0);
46861   expand_float (xa, xi, 0);
46862 
46863   /* res = copysign (xa, operand1) */
46864   ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46865 
46866   emit_label (label);
46867   LABEL_NUSES (label) = 1;
46868 
46869   emit_move_insn (operand0, res);
46870 }
46871 
46872 /* Expand SSE sequence for computing round
46873    from OP1 storing into OP0 using sse4 round insn.  */
46874 void
46875 ix86_expand_round_sse4 (rtx op0, rtx op1)
46876 {
46877   machine_mode mode = GET_MODE (op0);
46878   rtx e1, e2, res, half;
46879   const struct real_format *fmt;
46880   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46881   rtx (*gen_copysign) (rtx, rtx, rtx);
46882   rtx (*gen_round) (rtx, rtx, rtx);
46883 
46884   switch (mode)
46885     {
46886     case SFmode:
46887       gen_copysign = gen_copysignsf3;
46888       gen_round = gen_sse4_1_roundsf2;
46889       break;
46890     case DFmode:
46891       gen_copysign = gen_copysigndf3;
46892       gen_round = gen_sse4_1_rounddf2;
46893       break;
46894     default:
46895       gcc_unreachable ();
46896     }
46897 
46898   /* round (a) = trunc (a + copysign (0.5, a)) */
46899 
46900   /* load nextafter (0.5, 0.0) */
46901   fmt = REAL_MODE_FORMAT (mode);
46902   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46903   REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46904   half = const_double_from_real_value (pred_half, mode);
46905 
46906   /* e1 = copysign (0.5, op1) */
46907   e1 = gen_reg_rtx (mode);
46908   emit_insn (gen_copysign (e1, half, op1));
46909 
46910   /* e2 = op1 + e1 */
46911   e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46912 
46913   /* res = trunc (e2) */
46914   res = gen_reg_rtx (mode);
46915   emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46916 
46917   emit_move_insn (op0, res);
46918 }
46919 
46920 
46921 /* Table of valid machine attributes.  */
46922 static const struct attribute_spec ix86_attribute_table[] =
46923 {
46924   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46925        affects_type_identity } */
46926   /* Stdcall attribute says callee is responsible for popping arguments
46927      if they are not variable.  */
46928   { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute,
46929     true },
46930   /* Fastcall attribute says callee is responsible for popping arguments
46931      if they are not variable.  */
46932   { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute,
46933     true },
46934   /* Thiscall attribute says callee is responsible for popping arguments
46935      if they are not variable.  */
46936   { "thiscall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute,
46937     true },
46938   /* Cdecl attribute says the callee is a normal C declaration */
46939   { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute,
46940     true },
46941   /* Regparm attribute specifies how many integer arguments are to be
46942      passed in registers.  */
46943   { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute,
46944     true },
46945   /* Sseregparm attribute says we are using x86_64 calling conventions
46946      for FP arguments.  */
46947   { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46948     true },
46949   /* The transactional memory builtins are implicitly regparm or fastcall
46950      depending on the ABI.  Override the generic do-nothing attribute that
46951      these builtins were declared with.  */
46952   { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46953     true },
46954   /* force_align_arg_pointer says this function realigns the stack at entry.  */
46955   { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46956     false, true,  true, ix86_handle_force_align_arg_pointer_attribute, false },
46957 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46958   { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46959   { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46960   { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute,
46961     false },
46962 #endif
46963   { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute,
46964     false },
46965   { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute,
46966     false },
46967 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46968   SUBTARGET_ATTRIBUTE_TABLE,
46969 #endif
46970   /* ms_abi and sysv_abi calling convention function attributes.  */
46971   { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46972   { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46973   { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46974     false },
46975   { "callee_pop_aggregate_return", 1, 1, false, true, true,
46976     ix86_handle_callee_pop_aggregate_return, true },
46977   /* End element.  */
46978   { NULL,        0, 0, false, false, false, NULL, false }
46979 };
46980 
46981 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
46982 static int
46983 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46984                                  tree vectype, int)
46985 {
46986   unsigned elements;
46987 
46988   switch (type_of_cost)
46989     {
46990       case scalar_stmt:
46991         return ix86_cost->scalar_stmt_cost;
46992 
46993       case scalar_load:
46994         return ix86_cost->scalar_load_cost;
46995 
46996       case scalar_store:
46997         return ix86_cost->scalar_store_cost;
46998 
46999       case vector_stmt:
47000         return ix86_cost->vec_stmt_cost;
47001 
47002       case vector_load:
47003         return ix86_cost->vec_align_load_cost;
47004 
47005       case vector_store:
47006         return ix86_cost->vec_store_cost;
47007 
47008       case vec_to_scalar:
47009         return ix86_cost->vec_to_scalar_cost;
47010 
47011       case scalar_to_vec:
47012         return ix86_cost->scalar_to_vec_cost;
47013 
47014       case unaligned_load:
47015       case unaligned_store:
47016         return ix86_cost->vec_unalign_load_cost;
47017 
47018       case cond_branch_taken:
47019         return ix86_cost->cond_taken_branch_cost;
47020 
47021       case cond_branch_not_taken:
47022         return ix86_cost->cond_not_taken_branch_cost;
47023 
47024       case vec_perm:
47025       case vec_promote_demote:
47026         return ix86_cost->vec_stmt_cost;
47027 
47028       case vec_construct:
47029 	elements = TYPE_VECTOR_SUBPARTS (vectype);
47030 	return elements / 2 + 1;
47031 
47032       default:
47033         gcc_unreachable ();
47034     }
47035 }
47036 
47037 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
47038    insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
47039    insn every time.  */
47040 
47041 static GTY(()) rtx_insn *vselect_insn;
47042 
47043 /* Initialize vselect_insn.  */
47044 
47045 static void
47046 init_vselect_insn (void)
47047 {
47048   unsigned i;
47049   rtx x;
47050 
47051   x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
47052   for (i = 0; i < MAX_VECT_LEN; ++i)
47053     XVECEXP (x, 0, i) = const0_rtx;
47054   x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
47055 							const0_rtx), x);
47056   x = gen_rtx_SET (VOIDmode, const0_rtx, x);
47057   start_sequence ();
47058   vselect_insn = emit_insn (x);
47059   end_sequence ();
47060 }
47061 
47062 /* Construct (set target (vec_select op0 (parallel perm))) and
47063    return true if that's a valid instruction in the active ISA.  */
47064 
47065 static bool
47066 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
47067 		unsigned nelt, bool testing_p)
47068 {
47069   unsigned int i;
47070   rtx x, save_vconcat;
47071   int icode;
47072 
47073   if (vselect_insn == NULL_RTX)
47074     init_vselect_insn ();
47075 
47076   x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
47077   PUT_NUM_ELEM (XVEC (x, 0), nelt);
47078   for (i = 0; i < nelt; ++i)
47079     XVECEXP (x, 0, i) = GEN_INT (perm[i]);
47080   save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47081   XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
47082   PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
47083   SET_DEST (PATTERN (vselect_insn)) = target;
47084   icode = recog_memoized (vselect_insn);
47085 
47086   if (icode >= 0 && !testing_p)
47087     emit_insn (copy_rtx (PATTERN (vselect_insn)));
47088 
47089   SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
47090   XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
47091   INSN_CODE (vselect_insn) = -1;
47092 
47093   return icode >= 0;
47094 }
47095 
47096 /* Similar, but generate a vec_concat from op0 and op1 as well.  */
47097 
47098 static bool
47099 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
47100 			const unsigned char *perm, unsigned nelt,
47101 			bool testing_p)
47102 {
47103   machine_mode v2mode;
47104   rtx x;
47105   bool ok;
47106 
47107   if (vselect_insn == NULL_RTX)
47108     init_vselect_insn ();
47109 
47110   v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
47111   x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47112   PUT_MODE (x, v2mode);
47113   XEXP (x, 0) = op0;
47114   XEXP (x, 1) = op1;
47115   ok = expand_vselect (target, x, perm, nelt, testing_p);
47116   XEXP (x, 0) = const0_rtx;
47117   XEXP (x, 1) = const0_rtx;
47118   return ok;
47119 }
47120 
47121 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
47122    in terms of blendp[sd] / pblendw / pblendvb / vpblendd.  */
47123 
47124 static bool
47125 expand_vec_perm_blend (struct expand_vec_perm_d *d)
47126 {
47127   machine_mode mmode, vmode = d->vmode;
47128   unsigned i, mask, nelt = d->nelt;
47129   rtx target, op0, op1, maskop, x;
47130   rtx rperm[32], vperm;
47131 
47132   if (d->one_operand_p)
47133     return false;
47134   if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
47135       && (TARGET_AVX512BW
47136 	  || GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4))
47137     ;
47138   else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47139     ;
47140   else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47141     ;
47142   else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47143     ;
47144   else
47145     return false;
47146 
47147   /* This is a blend, not a permute.  Elements must stay in their
47148      respective lanes.  */
47149   for (i = 0; i < nelt; ++i)
47150     {
47151       unsigned e = d->perm[i];
47152       if (!(e == i || e == i + nelt))
47153 	return false;
47154     }
47155 
47156   if (d->testing_p)
47157     return true;
47158 
47159   /* ??? Without SSE4.1, we could implement this with and/andn/or.  This
47160      decision should be extracted elsewhere, so that we only try that
47161      sequence once all budget==3 options have been tried.  */
47162   target = d->target;
47163   op0 = d->op0;
47164   op1 = d->op1;
47165   mask = 0;
47166 
47167   switch (vmode)
47168     {
47169     case V8DFmode:
47170     case V16SFmode:
47171     case V4DFmode:
47172     case V8SFmode:
47173     case V2DFmode:
47174     case V4SFmode:
47175     case V8HImode:
47176     case V8SImode:
47177     case V32HImode:
47178     case V64QImode:
47179     case V16SImode:
47180     case V8DImode:
47181       for (i = 0; i < nelt; ++i)
47182 	mask |= (d->perm[i] >= nelt) << i;
47183       break;
47184 
47185     case V2DImode:
47186       for (i = 0; i < 2; ++i)
47187 	mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
47188       vmode = V8HImode;
47189       goto do_subreg;
47190 
47191     case V4SImode:
47192       for (i = 0; i < 4; ++i)
47193 	mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47194       vmode = V8HImode;
47195       goto do_subreg;
47196 
47197     case V16QImode:
47198       /* See if bytes move in pairs so we can use pblendw with
47199 	 an immediate argument, rather than pblendvb with a vector
47200 	 argument.  */
47201       for (i = 0; i < 16; i += 2)
47202 	if (d->perm[i] + 1 != d->perm[i + 1])
47203 	  {
47204 	  use_pblendvb:
47205 	    for (i = 0; i < nelt; ++i)
47206 	      rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
47207 
47208 	  finish_pblendvb:
47209 	    vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
47210 	    vperm = force_reg (vmode, vperm);
47211 
47212 	    if (GET_MODE_SIZE (vmode) == 16)
47213 	      emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
47214 	    else
47215 	      emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
47216 	    if (target != d->target)
47217 	      emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47218 	    return true;
47219 	  }
47220 
47221       for (i = 0; i < 8; ++i)
47222 	mask |= (d->perm[i * 2] >= 16) << i;
47223       vmode = V8HImode;
47224       /* FALLTHRU */
47225 
47226     do_subreg:
47227       target = gen_reg_rtx (vmode);
47228       op0 = gen_lowpart (vmode, op0);
47229       op1 = gen_lowpart (vmode, op1);
47230       break;
47231 
47232     case V32QImode:
47233       /* See if bytes move in pairs.  If not, vpblendvb must be used.  */
47234       for (i = 0; i < 32; i += 2)
47235 	if (d->perm[i] + 1 != d->perm[i + 1])
47236 	  goto use_pblendvb;
47237       /* See if bytes move in quadruplets.  If yes, vpblendd
47238 	 with immediate can be used.  */
47239       for (i = 0; i < 32; i += 4)
47240 	if (d->perm[i] + 2 != d->perm[i + 2])
47241 	  break;
47242       if (i < 32)
47243 	{
47244 	  /* See if bytes move the same in both lanes.  If yes,
47245 	     vpblendw with immediate can be used.  */
47246 	  for (i = 0; i < 16; i += 2)
47247 	    if (d->perm[i] + 16 != d->perm[i + 16])
47248 	      goto use_pblendvb;
47249 
47250 	  /* Use vpblendw.  */
47251 	  for (i = 0; i < 16; ++i)
47252 	    mask |= (d->perm[i * 2] >= 32) << i;
47253 	  vmode = V16HImode;
47254 	  goto do_subreg;
47255 	}
47256 
47257       /* Use vpblendd.  */
47258       for (i = 0; i < 8; ++i)
47259 	mask |= (d->perm[i * 4] >= 32) << i;
47260       vmode = V8SImode;
47261       goto do_subreg;
47262 
47263     case V16HImode:
47264       /* See if words move in pairs.  If yes, vpblendd can be used.  */
47265       for (i = 0; i < 16; i += 2)
47266 	if (d->perm[i] + 1 != d->perm[i + 1])
47267 	  break;
47268       if (i < 16)
47269 	{
47270 	  /* See if words move the same in both lanes.  If not,
47271 	     vpblendvb must be used.  */
47272 	  for (i = 0; i < 8; i++)
47273 	    if (d->perm[i] + 8 != d->perm[i + 8])
47274 	      {
47275 		/* Use vpblendvb.  */
47276 		for (i = 0; i < 32; ++i)
47277 		  rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47278 
47279 		vmode = V32QImode;
47280 		nelt = 32;
47281 		target = gen_reg_rtx (vmode);
47282 		op0 = gen_lowpart (vmode, op0);
47283 		op1 = gen_lowpart (vmode, op1);
47284 		goto finish_pblendvb;
47285 	      }
47286 
47287 	  /* Use vpblendw.  */
47288 	  for (i = 0; i < 16; ++i)
47289 	    mask |= (d->perm[i] >= 16) << i;
47290 	  break;
47291 	}
47292 
47293       /* Use vpblendd.  */
47294       for (i = 0; i < 8; ++i)
47295 	mask |= (d->perm[i * 2] >= 16) << i;
47296       vmode = V8SImode;
47297       goto do_subreg;
47298 
47299     case V4DImode:
47300       /* Use vpblendd.  */
47301       for (i = 0; i < 4; ++i)
47302 	mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47303       vmode = V8SImode;
47304       goto do_subreg;
47305 
47306     default:
47307       gcc_unreachable ();
47308     }
47309 
47310   switch (vmode)
47311     {
47312     case V8DFmode:
47313     case V8DImode:
47314       mmode = QImode;
47315       break;
47316     case V16SFmode:
47317     case V16SImode:
47318       mmode = HImode;
47319       break;
47320     case V32HImode:
47321       mmode = SImode;
47322       break;
47323     case V64QImode:
47324       mmode = DImode;
47325       break;
47326     default:
47327       mmode = VOIDmode;
47328     }
47329 
47330   if (mmode != VOIDmode)
47331     maskop = force_reg (mmode, gen_int_mode (mask, mmode));
47332   else
47333     maskop = GEN_INT (mask);
47334 
47335   /* This matches five different patterns with the different modes.  */
47336   x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
47337   x = gen_rtx_SET (VOIDmode, target, x);
47338   emit_insn (x);
47339   if (target != d->target)
47340     emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47341 
47342   return true;
47343 }
47344 
47345 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
47346    in terms of the variable form of vpermilps.
47347 
47348    Note that we will have already failed the immediate input vpermilps,
47349    which requires that the high and low part shuffle be identical; the
47350    variable form doesn't require that.  */
47351 
47352 static bool
47353 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47354 {
47355   rtx rperm[8], vperm;
47356   unsigned i;
47357 
47358   if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47359     return false;
47360 
47361   /* We can only permute within the 128-bit lane.  */
47362   for (i = 0; i < 8; ++i)
47363     {
47364       unsigned e = d->perm[i];
47365       if (i < 4 ? e >= 4 : e < 4)
47366 	return false;
47367     }
47368 
47369   if (d->testing_p)
47370     return true;
47371 
47372   for (i = 0; i < 8; ++i)
47373     {
47374       unsigned e = d->perm[i];
47375 
47376       /* Within each 128-bit lane, the elements of op0 are numbered
47377 	 from 0 and the elements of op1 are numbered from 4.  */
47378       if (e >= 8 + 4)
47379 	e -= 8;
47380       else if (e >= 4)
47381 	e -= 4;
47382 
47383       rperm[i] = GEN_INT (e);
47384     }
47385 
47386   vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47387   vperm = force_reg (V8SImode, vperm);
47388   emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47389 
47390   return true;
47391 }
47392 
47393 /* Return true if permutation D can be performed as VMODE permutation
47394    instead.  */
47395 
47396 static bool
47397 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47398 {
47399   unsigned int i, j, chunk;
47400 
47401   if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47402       || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47403       || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47404     return false;
47405 
47406   if (GET_MODE_NUNITS (vmode) >= d->nelt)
47407     return true;
47408 
47409   chunk = d->nelt / GET_MODE_NUNITS (vmode);
47410   for (i = 0; i < d->nelt; i += chunk)
47411     if (d->perm[i] & (chunk - 1))
47412       return false;
47413     else
47414       for (j = 1; j < chunk; ++j)
47415 	if (d->perm[i] + j != d->perm[i + j])
47416 	  return false;
47417 
47418   return true;
47419 }
47420 
47421 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
47422    in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128.  */
47423 
47424 static bool
47425 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47426 {
47427   unsigned i, nelt, eltsz, mask;
47428   unsigned char perm[64];
47429   machine_mode vmode = V16QImode;
47430   rtx rperm[64], vperm, target, op0, op1;
47431 
47432   nelt = d->nelt;
47433 
47434   if (!d->one_operand_p)
47435     {
47436       if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47437 	{
47438 	  if (TARGET_AVX2
47439 	      && valid_perm_using_mode_p (V2TImode, d))
47440 	    {
47441 	      if (d->testing_p)
47442 		return true;
47443 
47444 	      /* Use vperm2i128 insn.  The pattern uses
47445 		 V4DImode instead of V2TImode.  */
47446 	      target = d->target;
47447 	      if (d->vmode != V4DImode)
47448 		target = gen_reg_rtx (V4DImode);
47449 	      op0 = gen_lowpart (V4DImode, d->op0);
47450 	      op1 = gen_lowpart (V4DImode, d->op1);
47451 	      rperm[0]
47452 		= GEN_INT ((d->perm[0] / (nelt / 2))
47453 			   | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47454 	      emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47455 	      if (target != d->target)
47456 		emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47457 	      return true;
47458 	    }
47459 	  return false;
47460 	}
47461     }
47462   else
47463     {
47464       if (GET_MODE_SIZE (d->vmode) == 16)
47465 	{
47466 	  if (!TARGET_SSSE3)
47467 	    return false;
47468 	}
47469       else if (GET_MODE_SIZE (d->vmode) == 32)
47470 	{
47471 	  if (!TARGET_AVX2)
47472 	    return false;
47473 
47474 	  /* V4DImode should be already handled through
47475 	     expand_vselect by vpermq instruction.  */
47476 	  gcc_assert (d->vmode != V4DImode);
47477 
47478 	  vmode = V32QImode;
47479 	  if (d->vmode == V8SImode
47480 	      || d->vmode == V16HImode
47481 	      || d->vmode == V32QImode)
47482 	    {
47483 	      /* First see if vpermq can be used for
47484 		 V8SImode/V16HImode/V32QImode.  */
47485 	      if (valid_perm_using_mode_p (V4DImode, d))
47486 		{
47487 		  for (i = 0; i < 4; i++)
47488 		    perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47489 		  if (d->testing_p)
47490 		    return true;
47491 		  target = gen_reg_rtx (V4DImode);
47492 		  if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47493 				      perm, 4, false))
47494 		    {
47495 		      emit_move_insn (d->target,
47496 				      gen_lowpart (d->vmode, target));
47497 		      return true;
47498 		    }
47499 		  return false;
47500 		}
47501 
47502 	      /* Next see if vpermd can be used.  */
47503 	      if (valid_perm_using_mode_p (V8SImode, d))
47504 		vmode = V8SImode;
47505 	    }
47506 	  /* Or if vpermps can be used.  */
47507 	  else if (d->vmode == V8SFmode)
47508 	    vmode = V8SImode;
47509 
47510 	  if (vmode == V32QImode)
47511 	    {
47512 	      /* vpshufb only works intra lanes, it is not
47513 		 possible to shuffle bytes in between the lanes.  */
47514 	      for (i = 0; i < nelt; ++i)
47515 		if ((d->perm[i] ^ i) & (nelt / 2))
47516 		  return false;
47517 	    }
47518 	}
47519       else if (GET_MODE_SIZE (d->vmode) == 64)
47520 	{
47521 	  if (!TARGET_AVX512BW)
47522 	    return false;
47523 
47524 	  /* If vpermq didn't work, vpshufb won't work either.  */
47525 	  if (d->vmode == V8DFmode || d->vmode == V8DImode)
47526 	    return false;
47527 
47528 	  vmode = V64QImode;
47529 	  if (d->vmode == V16SImode
47530 	      || d->vmode == V32HImode
47531 	      || d->vmode == V64QImode)
47532 	    {
47533 	      /* First see if vpermq can be used for
47534 		 V16SImode/V32HImode/V64QImode.  */
47535 	      if (valid_perm_using_mode_p (V8DImode, d))
47536 		{
47537 		  for (i = 0; i < 8; i++)
47538 		    perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47539 		  if (d->testing_p)
47540 		    return true;
47541 		  target = gen_reg_rtx (V8DImode);
47542 		  if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47543 				      perm, 8, false))
47544 		    {
47545 		      emit_move_insn (d->target,
47546 				      gen_lowpart (d->vmode, target));
47547 		      return true;
47548 		    }
47549 		  return false;
47550 		}
47551 
47552 	      /* Next see if vpermd can be used.  */
47553 	      if (valid_perm_using_mode_p (V16SImode, d))
47554 		vmode = V16SImode;
47555 	    }
47556 	  /* Or if vpermps can be used.  */
47557 	  else if (d->vmode == V16SFmode)
47558 	    vmode = V16SImode;
47559 	  if (vmode == V64QImode)
47560 	    {
47561 	      /* vpshufb only works intra lanes, it is not
47562 		 possible to shuffle bytes in between the lanes.  */
47563 	      for (i = 0; i < nelt; ++i)
47564 		if ((d->perm[i] ^ i) & (nelt / 4))
47565 		  return false;
47566 	    }
47567 	}
47568       else
47569 	return false;
47570     }
47571 
47572   if (d->testing_p)
47573     return true;
47574 
47575   if (vmode == V8SImode)
47576     for (i = 0; i < 8; ++i)
47577       rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47578   else if (vmode == V16SImode)
47579     for (i = 0; i < 16; ++i)
47580       rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47581   else
47582     {
47583       eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47584       if (!d->one_operand_p)
47585 	mask = 2 * nelt - 1;
47586       else if (vmode == V16QImode)
47587 	mask = nelt - 1;
47588       else if (vmode == V64QImode)
47589 	mask = nelt / 4 - 1;
47590       else
47591 	mask = nelt / 2 - 1;
47592 
47593       for (i = 0; i < nelt; ++i)
47594 	{
47595 	  unsigned j, e = d->perm[i] & mask;
47596 	  for (j = 0; j < eltsz; ++j)
47597 	    rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47598 	}
47599     }
47600 
47601   vperm = gen_rtx_CONST_VECTOR (vmode,
47602 				gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47603   vperm = force_reg (vmode, vperm);
47604 
47605   target = d->target;
47606   if (d->vmode != vmode)
47607     target = gen_reg_rtx (vmode);
47608   op0 = gen_lowpart (vmode, d->op0);
47609   if (d->one_operand_p)
47610     {
47611       if (vmode == V16QImode)
47612 	emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47613       else if (vmode == V32QImode)
47614 	emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47615       else if (vmode == V64QImode)
47616 	emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47617       else if (vmode == V8SFmode)
47618 	emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47619       else if (vmode == V8SImode)
47620 	emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47621       else if (vmode == V16SFmode)
47622 	emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47623       else if (vmode == V16SImode)
47624 	emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47625       else
47626 	gcc_unreachable ();
47627     }
47628   else
47629     {
47630       op1 = gen_lowpart (vmode, d->op1);
47631       emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47632     }
47633   if (target != d->target)
47634     emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47635 
47636   return true;
47637 }
47638 
47639 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to instantiate D
47640    in a single instruction.  */
47641 
47642 static bool
47643 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47644 {
47645   unsigned i, nelt = d->nelt;
47646   unsigned char perm2[MAX_VECT_LEN];
47647 
47648   /* Check plain VEC_SELECT first, because AVX has instructions that could
47649      match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47650      input where SEL+CONCAT may not.  */
47651   if (d->one_operand_p)
47652     {
47653       int mask = nelt - 1;
47654       bool identity_perm = true;
47655       bool broadcast_perm = true;
47656 
47657       for (i = 0; i < nelt; i++)
47658 	{
47659 	  perm2[i] = d->perm[i] & mask;
47660 	  if (perm2[i] != i)
47661 	    identity_perm = false;
47662 	  if (perm2[i])
47663 	    broadcast_perm = false;
47664 	}
47665 
47666       if (identity_perm)
47667 	{
47668 	  if (!d->testing_p)
47669 	    emit_move_insn (d->target, d->op0);
47670 	  return true;
47671 	}
47672       else if (broadcast_perm && TARGET_AVX2)
47673 	{
47674 	  /* Use vpbroadcast{b,w,d}.  */
47675 	  rtx (*gen) (rtx, rtx) = NULL;
47676 	  switch (d->vmode)
47677 	    {
47678 	    case V64QImode:
47679 	      if (TARGET_AVX512BW)
47680 		gen = gen_avx512bw_vec_dupv64qi_1;
47681 	      break;
47682 	    case V32QImode:
47683 	      gen = gen_avx2_pbroadcastv32qi_1;
47684 	      break;
47685 	    case V32HImode:
47686 	      if (TARGET_AVX512BW)
47687 		gen = gen_avx512bw_vec_dupv32hi_1;
47688 	      break;
47689 	    case V16HImode:
47690 	      gen = gen_avx2_pbroadcastv16hi_1;
47691 	      break;
47692 	    case V16SImode:
47693 	      if (TARGET_AVX512F)
47694 		gen = gen_avx512f_vec_dupv16si_1;
47695 	      break;
47696 	    case V8SImode:
47697 	      gen = gen_avx2_pbroadcastv8si_1;
47698 	      break;
47699 	    case V16QImode:
47700 	      gen = gen_avx2_pbroadcastv16qi;
47701 	      break;
47702 	    case V8HImode:
47703 	      gen = gen_avx2_pbroadcastv8hi;
47704 	      break;
47705 	    case V16SFmode:
47706 	      if (TARGET_AVX512F)
47707 		gen = gen_avx512f_vec_dupv16sf_1;
47708 	      break;
47709 	    case V8SFmode:
47710 	      gen = gen_avx2_vec_dupv8sf_1;
47711 	      break;
47712 	    case V8DFmode:
47713 	      if (TARGET_AVX512F)
47714 		gen = gen_avx512f_vec_dupv8df_1;
47715 	      break;
47716 	    case V8DImode:
47717 	      if (TARGET_AVX512F)
47718 		gen = gen_avx512f_vec_dupv8di_1;
47719 	      break;
47720 	    /* For other modes prefer other shuffles this function creates.  */
47721 	    default: break;
47722 	    }
47723 	  if (gen != NULL)
47724 	    {
47725 	      if (!d->testing_p)
47726 		emit_insn (gen (d->target, d->op0));
47727 	      return true;
47728 	    }
47729 	}
47730 
47731       if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47732 	return true;
47733 
47734       /* There are plenty of patterns in sse.md that are written for
47735 	 SEL+CONCAT and are not replicated for a single op.  Perhaps
47736 	 that should be changed, to avoid the nastiness here.  */
47737 
47738       /* Recognize interleave style patterns, which means incrementing
47739 	 every other permutation operand.  */
47740       for (i = 0; i < nelt; i += 2)
47741 	{
47742 	  perm2[i] = d->perm[i] & mask;
47743 	  perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47744 	}
47745       if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47746 				  d->testing_p))
47747 	return true;
47748 
47749       /* Recognize shufps, which means adding {0, 0, nelt, nelt}.  */
47750       if (nelt >= 4)
47751 	{
47752 	  for (i = 0; i < nelt; i += 4)
47753 	    {
47754 	      perm2[i + 0] = d->perm[i + 0] & mask;
47755 	      perm2[i + 1] = d->perm[i + 1] & mask;
47756 	      perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47757 	      perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47758 	    }
47759 
47760 	  if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47761 				      d->testing_p))
47762 	    return true;
47763 	}
47764     }
47765 
47766   /* Finally, try the fully general two operand permute.  */
47767   if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47768 			      d->testing_p))
47769     return true;
47770 
47771   /* Recognize interleave style patterns with reversed operands.  */
47772   if (!d->one_operand_p)
47773     {
47774       for (i = 0; i < nelt; ++i)
47775 	{
47776 	  unsigned e = d->perm[i];
47777 	  if (e >= nelt)
47778 	    e -= nelt;
47779 	  else
47780 	    e += nelt;
47781 	  perm2[i] = e;
47782 	}
47783 
47784       if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47785 				  d->testing_p))
47786 	return true;
47787     }
47788 
47789   /* Try the SSE4.1 blend variable merge instructions.  */
47790   if (expand_vec_perm_blend (d))
47791     return true;
47792 
47793   /* Try one of the AVX vpermil variable permutations.  */
47794   if (expand_vec_perm_vpermil (d))
47795     return true;
47796 
47797   /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47798      vpshufb, vpermd, vpermps or vpermq variable permutation.  */
47799   if (expand_vec_perm_pshufb (d))
47800     return true;
47801 
47802   /* Try the AVX2 vpalignr instruction.  */
47803   if (expand_vec_perm_palignr (d, true))
47804     return true;
47805 
47806   /* Try the AVX512F vpermi2 instructions.  */
47807   if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47808     return true;
47809 
47810   return false;
47811 }
47812 
47813 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
47814    in terms of a pair of pshuflw + pshufhw instructions.  */
47815 
47816 static bool
47817 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47818 {
47819   unsigned char perm2[MAX_VECT_LEN];
47820   unsigned i;
47821   bool ok;
47822 
47823   if (d->vmode != V8HImode || !d->one_operand_p)
47824     return false;
47825 
47826   /* The two permutations only operate in 64-bit lanes.  */
47827   for (i = 0; i < 4; ++i)
47828     if (d->perm[i] >= 4)
47829       return false;
47830   for (i = 4; i < 8; ++i)
47831     if (d->perm[i] < 4)
47832       return false;
47833 
47834   if (d->testing_p)
47835     return true;
47836 
47837   /* Emit the pshuflw.  */
47838   memcpy (perm2, d->perm, 4);
47839   for (i = 4; i < 8; ++i)
47840     perm2[i] = i;
47841   ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47842   gcc_assert (ok);
47843 
47844   /* Emit the pshufhw.  */
47845   memcpy (perm2 + 4, d->perm + 4, 4);
47846   for (i = 0; i < 4; ++i)
47847     perm2[i] = i;
47848   ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47849   gcc_assert (ok);
47850 
47851   return true;
47852 }
47853 
47854 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
47855    the permutation using the SSSE3 palignr instruction.  This succeeds
47856    when all of the elements in PERM fit within one vector and we merely
47857    need to shift them down so that a single vector permutation has a
47858    chance to succeed.  If SINGLE_INSN_ONLY_P, succeed if only
47859    the vpalignr instruction itself can perform the requested permutation.  */
47860 
47861 static bool
47862 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47863 {
47864   unsigned i, nelt = d->nelt;
47865   unsigned min, max, minswap, maxswap;
47866   bool in_order, ok, swap = false;
47867   rtx shift, target;
47868   struct expand_vec_perm_d dcopy;
47869 
47870   /* Even with AVX, palignr only operates on 128-bit vectors,
47871      in AVX2 palignr operates on both 128-bit lanes.  */
47872   if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47873       && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47874     return false;
47875 
47876   min = 2 * nelt;
47877   max = 0;
47878   minswap = 2 * nelt;
47879   maxswap = 0;
47880   for (i = 0; i < nelt; ++i)
47881     {
47882       unsigned e = d->perm[i];
47883       unsigned eswap = d->perm[i] ^ nelt;
47884       if (GET_MODE_SIZE (d->vmode) == 32)
47885 	{
47886 	  e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47887 	  eswap = e ^ (nelt / 2);
47888 	}
47889       if (e < min)
47890 	min = e;
47891       if (e > max)
47892 	max = e;
47893       if (eswap < minswap)
47894 	minswap = eswap;
47895       if (eswap > maxswap)
47896 	maxswap = eswap;
47897     }
47898   if (min == 0
47899       || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47900     {
47901       if (d->one_operand_p
47902 	  || minswap == 0
47903 	  || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47904 				   ? nelt / 2 : nelt))
47905 	return false;
47906       swap = true;
47907       min = minswap;
47908       max = maxswap;
47909     }
47910 
47911   /* Given that we have SSSE3, we know we'll be able to implement the
47912      single operand permutation after the palignr with pshufb for
47913      128-bit vectors.  If SINGLE_INSN_ONLY_P, in_order has to be computed
47914      first.  */
47915   if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47916     return true;
47917 
47918   dcopy = *d;
47919   if (swap)
47920     {
47921       dcopy.op0 = d->op1;
47922       dcopy.op1 = d->op0;
47923       for (i = 0; i < nelt; ++i)
47924 	dcopy.perm[i] ^= nelt;
47925     }
47926 
47927   in_order = true;
47928   for (i = 0; i < nelt; ++i)
47929     {
47930       unsigned e = dcopy.perm[i];
47931       if (GET_MODE_SIZE (d->vmode) == 32
47932 	  && e >= nelt
47933 	  && (e & (nelt / 2 - 1)) < min)
47934 	e = e - min - (nelt / 2);
47935       else
47936 	e = e - min;
47937       if (e != i)
47938 	in_order = false;
47939       dcopy.perm[i] = e;
47940     }
47941   dcopy.one_operand_p = true;
47942 
47943   if (single_insn_only_p && !in_order)
47944     return false;
47945 
47946   /* For AVX2, test whether we can permute the result in one instruction.  */
47947   if (d->testing_p)
47948     {
47949       if (in_order)
47950 	return true;
47951       dcopy.op1 = dcopy.op0;
47952       return expand_vec_perm_1 (&dcopy);
47953     }
47954 
47955   shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47956   if (GET_MODE_SIZE (d->vmode) == 16)
47957     {
47958       target = gen_reg_rtx (TImode);
47959       emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47960 				      gen_lowpart (TImode, dcopy.op0), shift));
47961     }
47962   else
47963     {
47964       target = gen_reg_rtx (V2TImode);
47965       emit_insn (gen_avx2_palignrv2ti (target,
47966 				       gen_lowpart (V2TImode, dcopy.op1),
47967 				       gen_lowpart (V2TImode, dcopy.op0),
47968 				       shift));
47969     }
47970 
47971   dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47972 
47973   /* Test for the degenerate case where the alignment by itself
47974      produces the desired permutation.  */
47975   if (in_order)
47976     {
47977       emit_move_insn (d->target, dcopy.op0);
47978       return true;
47979     }
47980 
47981   ok = expand_vec_perm_1 (&dcopy);
47982   gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47983 
47984   return ok;
47985 }
47986 
47987 /* A subroutine of ix86_expand_vec_perm_const_1.  Try to simplify
47988    the permutation using the SSE4_1 pblendv instruction.  Potentially
47989    reduces permutation from 2 pshufb and or to 1 pshufb and pblendv.  */
47990 
47991 static bool
47992 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47993 {
47994   unsigned i, which, nelt = d->nelt;
47995   struct expand_vec_perm_d dcopy, dcopy1;
47996   machine_mode vmode = d->vmode;
47997   bool ok;
47998 
47999   /* Use the same checks as in expand_vec_perm_blend.  */
48000   if (d->one_operand_p)
48001     return false;
48002   if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
48003     ;
48004   else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
48005     ;
48006   else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
48007     ;
48008   else
48009     return false;
48010 
48011   /* Figure out where permutation elements stay not in their
48012      respective lanes.  */
48013   for (i = 0, which = 0; i < nelt; ++i)
48014     {
48015       unsigned e = d->perm[i];
48016       if (e != i)
48017 	which |= (e < nelt ? 1 : 2);
48018     }
48019   /* We can pblend the part where elements stay not in their
48020      respective lanes only when these elements are all in one
48021      half of a permutation.
48022      {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
48023      lanes, but both 8 and 9 >= 8
48024      {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
48025      respective lanes and 8 >= 8, but 2 not.  */
48026   if (which != 1 && which != 2)
48027     return false;
48028   if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
48029     return true;
48030 
48031   /* First we apply one operand permutation to the part where
48032      elements stay not in their respective lanes.  */
48033   dcopy = *d;
48034   if (which == 2)
48035     dcopy.op0 = dcopy.op1 = d->op1;
48036   else
48037     dcopy.op0 = dcopy.op1 = d->op0;
48038   if (!d->testing_p)
48039     dcopy.target = gen_reg_rtx (vmode);
48040   dcopy.one_operand_p = true;
48041 
48042   for (i = 0; i < nelt; ++i)
48043     dcopy.perm[i] = d->perm[i] & (nelt - 1);
48044 
48045   ok = expand_vec_perm_1 (&dcopy);
48046   if (GET_MODE_SIZE (vmode) != 16 && !ok)
48047     return false;
48048   else
48049     gcc_assert (ok);
48050   if (d->testing_p)
48051     return true;
48052 
48053   /* Next we put permuted elements into their positions.  */
48054   dcopy1 = *d;
48055   if (which == 2)
48056     dcopy1.op1 = dcopy.target;
48057   else
48058     dcopy1.op0 = dcopy.target;
48059 
48060   for (i = 0; i < nelt; ++i)
48061     dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
48062 
48063   ok = expand_vec_perm_blend (&dcopy1);
48064   gcc_assert (ok);
48065 
48066   return true;
48067 }
48068 
48069 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
48070 
48071 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
48072    a two vector permutation into a single vector permutation by using
48073    an interleave operation to merge the vectors.  */
48074 
48075 static bool
48076 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
48077 {
48078   struct expand_vec_perm_d dremap, dfinal;
48079   unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
48080   unsigned HOST_WIDE_INT contents;
48081   unsigned char remap[2 * MAX_VECT_LEN];
48082   rtx_insn *seq;
48083   bool ok, same_halves = false;
48084 
48085   if (GET_MODE_SIZE (d->vmode) == 16)
48086     {
48087       if (d->one_operand_p)
48088 	return false;
48089     }
48090   else if (GET_MODE_SIZE (d->vmode) == 32)
48091     {
48092       if (!TARGET_AVX)
48093 	return false;
48094       /* For 32-byte modes allow even d->one_operand_p.
48095 	 The lack of cross-lane shuffling in some instructions
48096 	 might prevent a single insn shuffle.  */
48097       dfinal = *d;
48098       dfinal.testing_p = true;
48099       /* If expand_vec_perm_interleave3 can expand this into
48100 	 a 3 insn sequence, give up and let it be expanded as
48101 	 3 insn sequence.  While that is one insn longer,
48102 	 it doesn't need a memory operand and in the common
48103 	 case that both interleave low and high permutations
48104 	 with the same operands are adjacent needs 4 insns
48105 	 for both after CSE.  */
48106       if (expand_vec_perm_interleave3 (&dfinal))
48107 	return false;
48108     }
48109   else
48110     return false;
48111 
48112   /* Examine from whence the elements come.  */
48113   contents = 0;
48114   for (i = 0; i < nelt; ++i)
48115     contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
48116 
48117   memset (remap, 0xff, sizeof (remap));
48118   dremap = *d;
48119 
48120   if (GET_MODE_SIZE (d->vmode) == 16)
48121     {
48122       unsigned HOST_WIDE_INT h1, h2, h3, h4;
48123 
48124       /* Split the two input vectors into 4 halves.  */
48125       h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
48126       h2 = h1 << nelt2;
48127       h3 = h2 << nelt2;
48128       h4 = h3 << nelt2;
48129 
48130       /* If the elements from the low halves use interleave low, and similarly
48131 	 for interleave high.  If the elements are from mis-matched halves, we
48132 	 can use shufps for V4SF/V4SI or do a DImode shuffle.  */
48133       if ((contents & (h1 | h3)) == contents)
48134 	{
48135 	  /* punpckl* */
48136 	  for (i = 0; i < nelt2; ++i)
48137 	    {
48138 	      remap[i] = i * 2;
48139 	      remap[i + nelt] = i * 2 + 1;
48140 	      dremap.perm[i * 2] = i;
48141 	      dremap.perm[i * 2 + 1] = i + nelt;
48142 	    }
48143 	  if (!TARGET_SSE2 && d->vmode == V4SImode)
48144 	    dremap.vmode = V4SFmode;
48145 	}
48146       else if ((contents & (h2 | h4)) == contents)
48147 	{
48148 	  /* punpckh* */
48149 	  for (i = 0; i < nelt2; ++i)
48150 	    {
48151 	      remap[i + nelt2] = i * 2;
48152 	      remap[i + nelt + nelt2] = i * 2 + 1;
48153 	      dremap.perm[i * 2] = i + nelt2;
48154 	      dremap.perm[i * 2 + 1] = i + nelt + nelt2;
48155 	    }
48156 	  if (!TARGET_SSE2 && d->vmode == V4SImode)
48157 	    dremap.vmode = V4SFmode;
48158 	}
48159       else if ((contents & (h1 | h4)) == contents)
48160 	{
48161 	  /* shufps */
48162 	  for (i = 0; i < nelt2; ++i)
48163 	    {
48164 	      remap[i] = i;
48165 	      remap[i + nelt + nelt2] = i + nelt2;
48166 	      dremap.perm[i] = i;
48167 	      dremap.perm[i + nelt2] = i + nelt + nelt2;
48168 	    }
48169 	  if (nelt != 4)
48170 	    {
48171 	      /* shufpd */
48172 	      dremap.vmode = V2DImode;
48173 	      dremap.nelt = 2;
48174 	      dremap.perm[0] = 0;
48175 	      dremap.perm[1] = 3;
48176 	    }
48177 	}
48178       else if ((contents & (h2 | h3)) == contents)
48179 	{
48180 	  /* shufps */
48181 	  for (i = 0; i < nelt2; ++i)
48182 	    {
48183 	      remap[i + nelt2] = i;
48184 	      remap[i + nelt] = i + nelt2;
48185 	      dremap.perm[i] = i + nelt2;
48186 	      dremap.perm[i + nelt2] = i + nelt;
48187 	    }
48188 	  if (nelt != 4)
48189 	    {
48190 	      /* shufpd */
48191 	      dremap.vmode = V2DImode;
48192 	      dremap.nelt = 2;
48193 	      dremap.perm[0] = 1;
48194 	      dremap.perm[1] = 2;
48195 	    }
48196 	}
48197       else
48198 	return false;
48199     }
48200   else
48201     {
48202       unsigned int nelt4 = nelt / 4, nzcnt = 0;
48203       unsigned HOST_WIDE_INT q[8];
48204       unsigned int nonzero_halves[4];
48205 
48206       /* Split the two input vectors into 8 quarters.  */
48207       q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
48208       for (i = 1; i < 8; ++i)
48209 	q[i] = q[0] << (nelt4 * i);
48210       for (i = 0; i < 4; ++i)
48211 	if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
48212 	  {
48213 	    nonzero_halves[nzcnt] = i;
48214 	    ++nzcnt;
48215 	  }
48216 
48217       if (nzcnt == 1)
48218 	{
48219 	  gcc_assert (d->one_operand_p);
48220 	  nonzero_halves[1] = nonzero_halves[0];
48221 	  same_halves = true;
48222 	}
48223       else if (d->one_operand_p)
48224 	{
48225 	  gcc_assert (nonzero_halves[0] == 0);
48226 	  gcc_assert (nonzero_halves[1] == 1);
48227 	}
48228 
48229       if (nzcnt <= 2)
48230 	{
48231 	  if (d->perm[0] / nelt2 == nonzero_halves[1])
48232 	    {
48233 	      /* Attempt to increase the likelihood that dfinal
48234 		 shuffle will be intra-lane.  */
48235 	      char tmph = nonzero_halves[0];
48236 	      nonzero_halves[0] = nonzero_halves[1];
48237 	      nonzero_halves[1] = tmph;
48238 	    }
48239 
48240 	  /* vperm2f128 or vperm2i128.  */
48241 	  for (i = 0; i < nelt2; ++i)
48242 	    {
48243 	      remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
48244 	      remap[i + nonzero_halves[0] * nelt2] = i;
48245 	      dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
48246 	      dremap.perm[i] = i + nonzero_halves[0] * nelt2;
48247 	    }
48248 
48249 	  if (d->vmode != V8SFmode
48250 	      && d->vmode != V4DFmode
48251 	      && d->vmode != V8SImode)
48252 	    {
48253 	      dremap.vmode = V8SImode;
48254 	      dremap.nelt = 8;
48255 	      for (i = 0; i < 4; ++i)
48256 		{
48257 		  dremap.perm[i] = i + nonzero_halves[0] * 4;
48258 		  dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
48259 		}
48260 	    }
48261 	}
48262       else if (d->one_operand_p)
48263 	return false;
48264       else if (TARGET_AVX2
48265 	       && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
48266 	{
48267 	  /* vpunpckl* */
48268 	  for (i = 0; i < nelt4; ++i)
48269 	    {
48270 	      remap[i] = i * 2;
48271 	      remap[i + nelt] = i * 2 + 1;
48272 	      remap[i + nelt2] = i * 2 + nelt2;
48273 	      remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
48274 	      dremap.perm[i * 2] = i;
48275 	      dremap.perm[i * 2 + 1] = i + nelt;
48276 	      dremap.perm[i * 2 + nelt2] = i + nelt2;
48277 	      dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
48278 	    }
48279 	}
48280       else if (TARGET_AVX2
48281 	       && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
48282 	{
48283 	  /* vpunpckh* */
48284 	  for (i = 0; i < nelt4; ++i)
48285 	    {
48286 	      remap[i + nelt4] = i * 2;
48287 	      remap[i + nelt + nelt4] = i * 2 + 1;
48288 	      remap[i + nelt2 + nelt4] = i * 2 + nelt2;
48289 	      remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
48290 	      dremap.perm[i * 2] = i + nelt4;
48291 	      dremap.perm[i * 2 + 1] = i + nelt + nelt4;
48292 	      dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
48293 	      dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
48294 	    }
48295 	}
48296       else
48297 	return false;
48298     }
48299 
48300   /* Use the remapping array set up above to move the elements from their
48301      swizzled locations into their final destinations.  */
48302   dfinal = *d;
48303   for (i = 0; i < nelt; ++i)
48304     {
48305       unsigned e = remap[d->perm[i]];
48306       gcc_assert (e < nelt);
48307       /* If same_halves is true, both halves of the remapped vector are the
48308 	 same.  Avoid cross-lane accesses if possible.  */
48309       if (same_halves && i >= nelt2)
48310 	{
48311 	  gcc_assert (e < nelt2);
48312 	  dfinal.perm[i] = e + nelt2;
48313 	}
48314       else
48315 	dfinal.perm[i] = e;
48316     }
48317   if (!d->testing_p)
48318     {
48319       dremap.target = gen_reg_rtx (dremap.vmode);
48320       dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48321     }
48322   dfinal.op1 = dfinal.op0;
48323   dfinal.one_operand_p = true;
48324 
48325   /* Test if the final remap can be done with a single insn.  For V4SFmode or
48326      V4SImode this *will* succeed.  For V8HImode or V16QImode it may not.  */
48327   start_sequence ();
48328   ok = expand_vec_perm_1 (&dfinal);
48329   seq = get_insns ();
48330   end_sequence ();
48331 
48332   if (!ok)
48333     return false;
48334 
48335   if (d->testing_p)
48336     return true;
48337 
48338   if (dremap.vmode != dfinal.vmode)
48339     {
48340       dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48341       dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48342     }
48343 
48344   ok = expand_vec_perm_1 (&dremap);
48345   gcc_assert (ok);
48346 
48347   emit_insn (seq);
48348   return true;
48349 }
48350 
48351 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
48352    a single vector cross-lane permutation into vpermq followed
48353    by any of the single insn permutations.  */
48354 
48355 static bool
48356 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48357 {
48358   struct expand_vec_perm_d dremap, dfinal;
48359   unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48360   unsigned contents[2];
48361   bool ok;
48362 
48363   if (!(TARGET_AVX2
48364 	&& (d->vmode == V32QImode || d->vmode == V16HImode)
48365 	&& d->one_operand_p))
48366     return false;
48367 
48368   contents[0] = 0;
48369   contents[1] = 0;
48370   for (i = 0; i < nelt2; ++i)
48371     {
48372       contents[0] |= 1u << (d->perm[i] / nelt4);
48373       contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48374     }
48375 
48376   for (i = 0; i < 2; ++i)
48377     {
48378       unsigned int cnt = 0;
48379       for (j = 0; j < 4; ++j)
48380 	if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48381 	  return false;
48382     }
48383 
48384   if (d->testing_p)
48385     return true;
48386 
48387   dremap = *d;
48388   dremap.vmode = V4DImode;
48389   dremap.nelt = 4;
48390   dremap.target = gen_reg_rtx (V4DImode);
48391   dremap.op0 = gen_lowpart (V4DImode, d->op0);
48392   dremap.op1 = dremap.op0;
48393   dremap.one_operand_p = true;
48394   for (i = 0; i < 2; ++i)
48395     {
48396       unsigned int cnt = 0;
48397       for (j = 0; j < 4; ++j)
48398 	if ((contents[i] & (1u << j)) != 0)
48399 	  dremap.perm[2 * i + cnt++] = j;
48400       for (; cnt < 2; ++cnt)
48401 	dremap.perm[2 * i + cnt] = 0;
48402     }
48403 
48404   dfinal = *d;
48405   dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48406   dfinal.op1 = dfinal.op0;
48407   dfinal.one_operand_p = true;
48408   for (i = 0, j = 0; i < nelt; ++i)
48409     {
48410       if (i == nelt2)
48411 	j = 2;
48412       dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48413       if ((d->perm[i] / nelt4) == dremap.perm[j])
48414 	;
48415       else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48416 	dfinal.perm[i] |= nelt4;
48417       else
48418 	gcc_unreachable ();
48419     }
48420 
48421   ok = expand_vec_perm_1 (&dremap);
48422   gcc_assert (ok);
48423 
48424   ok = expand_vec_perm_1 (&dfinal);
48425   gcc_assert (ok);
48426 
48427   return true;
48428 }
48429 
48430 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to expand
48431    a vector permutation using two instructions, vperm2f128 resp.
48432    vperm2i128 followed by any single in-lane permutation.  */
48433 
48434 static bool
48435 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48436 {
48437   struct expand_vec_perm_d dfirst, dsecond;
48438   unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48439   bool ok;
48440 
48441   if (!TARGET_AVX
48442       || GET_MODE_SIZE (d->vmode) != 32
48443       || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48444     return false;
48445 
48446   dsecond = *d;
48447   dsecond.one_operand_p = false;
48448   dsecond.testing_p = true;
48449 
48450   /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48451      immediate.  For perm < 16 the second permutation uses
48452      d->op0 as first operand, for perm >= 16 it uses d->op1
48453      as first operand.  The second operand is the result of
48454      vperm2[fi]128.  */
48455   for (perm = 0; perm < 32; perm++)
48456     {
48457       /* Ignore permutations which do not move anything cross-lane.  */
48458       if (perm < 16)
48459 	{
48460 	  /* The second shuffle for e.g. V4DFmode has
48461 	     0123 and ABCD operands.
48462 	     Ignore AB23, as 23 is already in the second lane
48463 	     of the first operand.  */
48464 	  if ((perm & 0xc) == (1 << 2)) continue;
48465 	  /* And 01CD, as 01 is in the first lane of the first
48466 	     operand.  */
48467 	  if ((perm & 3) == 0) continue;
48468 	  /* And 4567, as then the vperm2[fi]128 doesn't change
48469 	     anything on the original 4567 second operand.  */
48470 	  if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48471 	}
48472       else
48473 	{
48474 	  /* The second shuffle for e.g. V4DFmode has
48475 	     4567 and ABCD operands.
48476 	     Ignore AB67, as 67 is already in the second lane
48477 	     of the first operand.  */
48478 	  if ((perm & 0xc) == (3 << 2)) continue;
48479 	  /* And 45CD, as 45 is in the first lane of the first
48480 	     operand.  */
48481 	  if ((perm & 3) == 2) continue;
48482 	  /* And 0123, as then the vperm2[fi]128 doesn't change
48483 	     anything on the original 0123 first operand.  */
48484 	  if ((perm & 0xf) == (1 << 2)) continue;
48485 	}
48486 
48487       for (i = 0; i < nelt; i++)
48488 	{
48489 	  j = d->perm[i] / nelt2;
48490 	  if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48491 	    dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48492 	  else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48493 	    dsecond.perm[i] = d->perm[i] & (nelt - 1);
48494 	  else
48495 	    break;
48496 	}
48497 
48498       if (i == nelt)
48499 	{
48500 	  start_sequence ();
48501 	  ok = expand_vec_perm_1 (&dsecond);
48502 	  end_sequence ();
48503 	}
48504       else
48505 	ok = false;
48506 
48507       if (ok)
48508 	{
48509 	  if (d->testing_p)
48510 	    return true;
48511 
48512 	  /* Found a usable second shuffle.  dfirst will be
48513 	     vperm2f128 on d->op0 and d->op1.  */
48514 	  dsecond.testing_p = false;
48515 	  dfirst = *d;
48516 	  dfirst.target = gen_reg_rtx (d->vmode);
48517 	  for (i = 0; i < nelt; i++)
48518 	    dfirst.perm[i] = (i & (nelt2 - 1))
48519 			     + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48520 
48521 	  canonicalize_perm (&dfirst);
48522 	  ok = expand_vec_perm_1 (&dfirst);
48523 	  gcc_assert (ok);
48524 
48525 	  /* And dsecond is some single insn shuffle, taking
48526 	     d->op0 and result of vperm2f128 (if perm < 16) or
48527 	     d->op1 and result of vperm2f128 (otherwise).  */
48528 	  if (perm >= 16)
48529 	    dsecond.op0 = dsecond.op1;
48530 	  dsecond.op1 = dfirst.target;
48531 
48532 	  ok = expand_vec_perm_1 (&dsecond);
48533 	  gcc_assert (ok);
48534 
48535 	  return true;
48536 	}
48537 
48538       /* For one operand, the only useful vperm2f128 permutation is 0x01
48539 	 aka lanes swap.  */
48540       if (d->one_operand_p)
48541 	return false;
48542     }
48543 
48544   return false;
48545 }
48546 
48547 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
48548    a two vector permutation using 2 intra-lane interleave insns
48549    and cross-lane shuffle for 32-byte vectors.  */
48550 
48551 static bool
48552 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48553 {
48554   unsigned i, nelt;
48555   rtx (*gen) (rtx, rtx, rtx);
48556 
48557   if (d->one_operand_p)
48558     return false;
48559   if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48560     ;
48561   else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48562     ;
48563   else
48564     return false;
48565 
48566   nelt = d->nelt;
48567   if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48568     return false;
48569   for (i = 0; i < nelt; i += 2)
48570     if (d->perm[i] != d->perm[0] + i / 2
48571 	|| d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48572       return false;
48573 
48574   if (d->testing_p)
48575     return true;
48576 
48577   switch (d->vmode)
48578     {
48579     case V32QImode:
48580       if (d->perm[0])
48581 	gen = gen_vec_interleave_highv32qi;
48582       else
48583 	gen = gen_vec_interleave_lowv32qi;
48584       break;
48585     case V16HImode:
48586       if (d->perm[0])
48587 	gen = gen_vec_interleave_highv16hi;
48588       else
48589 	gen = gen_vec_interleave_lowv16hi;
48590       break;
48591     case V8SImode:
48592       if (d->perm[0])
48593 	gen = gen_vec_interleave_highv8si;
48594       else
48595 	gen = gen_vec_interleave_lowv8si;
48596       break;
48597     case V4DImode:
48598       if (d->perm[0])
48599 	gen = gen_vec_interleave_highv4di;
48600       else
48601 	gen = gen_vec_interleave_lowv4di;
48602       break;
48603     case V8SFmode:
48604       if (d->perm[0])
48605 	gen = gen_vec_interleave_highv8sf;
48606       else
48607 	gen = gen_vec_interleave_lowv8sf;
48608       break;
48609     case V4DFmode:
48610       if (d->perm[0])
48611 	gen = gen_vec_interleave_highv4df;
48612       else
48613 	gen = gen_vec_interleave_lowv4df;
48614       break;
48615     default:
48616       gcc_unreachable ();
48617     }
48618 
48619   emit_insn (gen (d->target, d->op0, d->op1));
48620   return true;
48621 }
48622 
48623 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement
48624    a single vector permutation using a single intra-lane vector
48625    permutation, vperm2f128 swapping the lanes and vblend* insn blending
48626    the non-swapped and swapped vectors together.  */
48627 
48628 static bool
48629 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48630 {
48631   struct expand_vec_perm_d dfirst, dsecond;
48632   unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48633   rtx_insn *seq;
48634   bool ok;
48635   rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48636 
48637   if (!TARGET_AVX
48638       || TARGET_AVX2
48639       || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48640       || !d->one_operand_p)
48641     return false;
48642 
48643   dfirst = *d;
48644   for (i = 0; i < nelt; i++)
48645     dfirst.perm[i] = 0xff;
48646   for (i = 0, msk = 0; i < nelt; i++)
48647     {
48648       j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48649       if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48650 	return false;
48651       dfirst.perm[j] = d->perm[i];
48652       if (j != i)
48653 	msk |= (1 << i);
48654     }
48655   for (i = 0; i < nelt; i++)
48656     if (dfirst.perm[i] == 0xff)
48657       dfirst.perm[i] = i;
48658 
48659   if (!d->testing_p)
48660     dfirst.target = gen_reg_rtx (dfirst.vmode);
48661 
48662   start_sequence ();
48663   ok = expand_vec_perm_1 (&dfirst);
48664   seq = get_insns ();
48665   end_sequence ();
48666 
48667   if (!ok)
48668     return false;
48669 
48670   if (d->testing_p)
48671     return true;
48672 
48673   emit_insn (seq);
48674 
48675   dsecond = *d;
48676   dsecond.op0 = dfirst.target;
48677   dsecond.op1 = dfirst.target;
48678   dsecond.one_operand_p = true;
48679   dsecond.target = gen_reg_rtx (dsecond.vmode);
48680   for (i = 0; i < nelt; i++)
48681     dsecond.perm[i] = i ^ nelt2;
48682 
48683   ok = expand_vec_perm_1 (&dsecond);
48684   gcc_assert (ok);
48685 
48686   blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48687   emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48688   return true;
48689 }
48690 
48691 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement a V4DF
48692    permutation using two vperm2f128, followed by a vshufpd insn blending
48693    the two vectors together.  */
48694 
48695 static bool
48696 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48697 {
48698   struct expand_vec_perm_d dfirst, dsecond, dthird;
48699   bool ok;
48700 
48701   if (!TARGET_AVX || (d->vmode != V4DFmode))
48702     return false;
48703 
48704   if (d->testing_p)
48705     return true;
48706 
48707   dfirst = *d;
48708   dsecond = *d;
48709   dthird = *d;
48710 
48711   dfirst.perm[0] = (d->perm[0] & ~1);
48712   dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48713   dfirst.perm[2] = (d->perm[2] & ~1);
48714   dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48715   dsecond.perm[0] = (d->perm[1] & ~1);
48716   dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48717   dsecond.perm[2] = (d->perm[3] & ~1);
48718   dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48719   dthird.perm[0] = (d->perm[0] % 2);
48720   dthird.perm[1] = (d->perm[1] % 2) + 4;
48721   dthird.perm[2] = (d->perm[2] % 2) + 2;
48722   dthird.perm[3] = (d->perm[3] % 2) + 6;
48723 
48724   dfirst.target = gen_reg_rtx (dfirst.vmode);
48725   dsecond.target = gen_reg_rtx (dsecond.vmode);
48726   dthird.op0 = dfirst.target;
48727   dthird.op1 = dsecond.target;
48728   dthird.one_operand_p = false;
48729 
48730   canonicalize_perm (&dfirst);
48731   canonicalize_perm (&dsecond);
48732 
48733   ok = expand_vec_perm_1 (&dfirst)
48734        && expand_vec_perm_1 (&dsecond)
48735        && expand_vec_perm_1 (&dthird);
48736 
48737   gcc_assert (ok);
48738 
48739   return true;
48740 }
48741 
48742 /* A subroutine of expand_vec_perm_even_odd_1.  Implement the double-word
48743    permutation with two pshufb insns and an ior.  We should have already
48744    failed all two instruction sequences.  */
48745 
48746 static bool
48747 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48748 {
48749   rtx rperm[2][16], vperm, l, h, op, m128;
48750   unsigned int i, nelt, eltsz;
48751 
48752   if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48753     return false;
48754   gcc_assert (!d->one_operand_p);
48755 
48756   if (d->testing_p)
48757     return true;
48758 
48759   nelt = d->nelt;
48760   eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48761 
48762   /* Generate two permutation masks.  If the required element is within
48763      the given vector it is shuffled into the proper lane.  If the required
48764      element is in the other vector, force a zero into the lane by setting
48765      bit 7 in the permutation mask.  */
48766   m128 = GEN_INT (-128);
48767   for (i = 0; i < nelt; ++i)
48768     {
48769       unsigned j, e = d->perm[i];
48770       unsigned which = (e >= nelt);
48771       if (e >= nelt)
48772 	e -= nelt;
48773 
48774       for (j = 0; j < eltsz; ++j)
48775 	{
48776 	  rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48777 	  rperm[1-which][i*eltsz + j] = m128;
48778 	}
48779     }
48780 
48781   vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48782   vperm = force_reg (V16QImode, vperm);
48783 
48784   l = gen_reg_rtx (V16QImode);
48785   op = gen_lowpart (V16QImode, d->op0);
48786   emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48787 
48788   vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48789   vperm = force_reg (V16QImode, vperm);
48790 
48791   h = gen_reg_rtx (V16QImode);
48792   op = gen_lowpart (V16QImode, d->op1);
48793   emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48794 
48795   op = d->target;
48796   if (d->vmode != V16QImode)
48797     op = gen_reg_rtx (V16QImode);
48798   emit_insn (gen_iorv16qi3 (op, l, h));
48799   if (op != d->target)
48800     emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48801 
48802   return true;
48803 }
48804 
48805 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48806    with two vpshufb insns, vpermq and vpor.  We should have already failed
48807    all two or three instruction sequences.  */
48808 
48809 static bool
48810 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48811 {
48812   rtx rperm[2][32], vperm, l, h, hp, op, m128;
48813   unsigned int i, nelt, eltsz;
48814 
48815   if (!TARGET_AVX2
48816       || !d->one_operand_p
48817       || (d->vmode != V32QImode && d->vmode != V16HImode))
48818     return false;
48819 
48820   if (d->testing_p)
48821     return true;
48822 
48823   nelt = d->nelt;
48824   eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48825 
48826   /* Generate two permutation masks.  If the required element is within
48827      the same lane, it is shuffled in.  If the required element from the
48828      other lane, force a zero by setting bit 7 in the permutation mask.
48829      In the other mask the mask has non-negative elements if element
48830      is requested from the other lane, but also moved to the other lane,
48831      so that the result of vpshufb can have the two V2TImode halves
48832      swapped.  */
48833   m128 = GEN_INT (-128);
48834   for (i = 0; i < nelt; ++i)
48835     {
48836       unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48837       unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48838 
48839       for (j = 0; j < eltsz; ++j)
48840 	{
48841 	  rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48842 	  rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48843 	}
48844     }
48845 
48846   vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48847   vperm = force_reg (V32QImode, vperm);
48848 
48849   h = gen_reg_rtx (V32QImode);
48850   op = gen_lowpart (V32QImode, d->op0);
48851   emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48852 
48853   /* Swap the 128-byte lanes of h into hp.  */
48854   hp = gen_reg_rtx (V4DImode);
48855   op = gen_lowpart (V4DImode, h);
48856   emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48857 				  const1_rtx));
48858 
48859   vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48860   vperm = force_reg (V32QImode, vperm);
48861 
48862   l = gen_reg_rtx (V32QImode);
48863   op = gen_lowpart (V32QImode, d->op0);
48864   emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48865 
48866   op = d->target;
48867   if (d->vmode != V32QImode)
48868     op = gen_reg_rtx (V32QImode);
48869   emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48870   if (op != d->target)
48871     emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48872 
48873   return true;
48874 }
48875 
48876 /* A subroutine of expand_vec_perm_even_odd_1.  Implement extract-even
48877    and extract-odd permutations of two V32QImode and V16QImode operand
48878    with two vpshufb insns, vpor and vpermq.  We should have already
48879    failed all two or three instruction sequences.  */
48880 
48881 static bool
48882 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48883 {
48884   rtx rperm[2][32], vperm, l, h, ior, op, m128;
48885   unsigned int i, nelt, eltsz;
48886 
48887   if (!TARGET_AVX2
48888       || d->one_operand_p
48889       || (d->vmode != V32QImode && d->vmode != V16HImode))
48890     return false;
48891 
48892   for (i = 0; i < d->nelt; ++i)
48893     if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48894       return false;
48895 
48896   if (d->testing_p)
48897     return true;
48898 
48899   nelt = d->nelt;
48900   eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48901 
48902   /* Generate two permutation masks.  In the first permutation mask
48903      the first quarter will contain indexes for the first half
48904      of the op0, the second quarter will contain bit 7 set, third quarter
48905      will contain indexes for the second half of the op0 and the
48906      last quarter bit 7 set.  In the second permutation mask
48907      the first quarter will contain bit 7 set, the second quarter
48908      indexes for the first half of the op1, the third quarter bit 7 set
48909      and last quarter indexes for the second half of the op1.
48910      I.e. the first mask e.g. for V32QImode extract even will be:
48911      0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48912      (all values masked with 0xf except for -128) and second mask
48913      for extract even will be
48914      -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe.  */
48915   m128 = GEN_INT (-128);
48916   for (i = 0; i < nelt; ++i)
48917     {
48918       unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48919       unsigned which = d->perm[i] >= nelt;
48920       unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48921 
48922       for (j = 0; j < eltsz; ++j)
48923 	{
48924 	  rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48925 	  rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48926 	}
48927     }
48928 
48929   vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48930   vperm = force_reg (V32QImode, vperm);
48931 
48932   l = gen_reg_rtx (V32QImode);
48933   op = gen_lowpart (V32QImode, d->op0);
48934   emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48935 
48936   vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48937   vperm = force_reg (V32QImode, vperm);
48938 
48939   h = gen_reg_rtx (V32QImode);
48940   op = gen_lowpart (V32QImode, d->op1);
48941   emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48942 
48943   ior = gen_reg_rtx (V32QImode);
48944   emit_insn (gen_iorv32qi3 (ior, l, h));
48945 
48946   /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation.  */
48947   op = gen_reg_rtx (V4DImode);
48948   ior = gen_lowpart (V4DImode, ior);
48949   emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48950 				  const1_rtx, GEN_INT (3)));
48951   emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48952 
48953   return true;
48954 }
48955 
48956 /* A subroutine of expand_vec_perm_even_odd_1.  Implement extract-even
48957    and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48958    with two "and" and "pack" or two "shift" and "pack" insns.  We should
48959    have already failed all two instruction sequences.  */
48960 
48961 static bool
48962 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48963 {
48964   rtx op, dop0, dop1, t, rperm[16];
48965   unsigned i, odd, c, s, nelt = d->nelt;
48966   bool end_perm = false;
48967   machine_mode half_mode;
48968   rtx (*gen_and) (rtx, rtx, rtx);
48969   rtx (*gen_pack) (rtx, rtx, rtx);
48970   rtx (*gen_shift) (rtx, rtx, rtx);
48971 
48972   if (d->one_operand_p)
48973     return false;
48974 
48975   switch (d->vmode)
48976     {
48977     case V8HImode:
48978       /* Required for "pack".  */
48979       if (!TARGET_SSE4_1)
48980         return false;
48981       c = 0xffff;
48982       s = 16;
48983       half_mode = V4SImode;
48984       gen_and = gen_andv4si3;
48985       gen_pack = gen_sse4_1_packusdw;
48986       gen_shift = gen_lshrv4si3;
48987       break;
48988     case V16QImode:
48989       /* No check as all instructions are SSE2.  */
48990       c = 0xff;
48991       s = 8;
48992       half_mode = V8HImode;
48993       gen_and = gen_andv8hi3;
48994       gen_pack = gen_sse2_packuswb;
48995       gen_shift = gen_lshrv8hi3;
48996       break;
48997     case V16HImode:
48998       if (!TARGET_AVX2)
48999         return false;
49000       c = 0xffff;
49001       s = 16;
49002       half_mode = V8SImode;
49003       gen_and = gen_andv8si3;
49004       gen_pack = gen_avx2_packusdw;
49005       gen_shift = gen_lshrv8si3;
49006       end_perm = true;
49007       break;
49008     case V32QImode:
49009       if (!TARGET_AVX2)
49010         return false;
49011       c = 0xff;
49012       s = 8;
49013       half_mode = V16HImode;
49014       gen_and = gen_andv16hi3;
49015       gen_pack = gen_avx2_packuswb;
49016       gen_shift = gen_lshrv16hi3;
49017       end_perm = true;
49018       break;
49019     default:
49020       /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
49021 	 general shuffles.  */
49022       return false;
49023     }
49024 
49025   /* Check that permutation is even or odd.  */
49026   odd = d->perm[0];
49027   if (odd > 1)
49028     return false;
49029 
49030   for (i = 1; i < nelt; ++i)
49031     if (d->perm[i] != 2 * i + odd)
49032       return false;
49033 
49034   if (d->testing_p)
49035     return true;
49036 
49037   dop0 = gen_reg_rtx (half_mode);
49038   dop1 = gen_reg_rtx (half_mode);
49039   if (odd == 0)
49040     {
49041       for (i = 0; i < nelt / 2; i++)
49042 	rperm[i] = GEN_INT (c);
49043       t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
49044       t = force_reg (half_mode, t);
49045       emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
49046       emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
49047     }
49048   else
49049     {
49050       emit_insn (gen_shift (dop0,
49051 			    gen_lowpart (half_mode, d->op0),
49052 			    GEN_INT (s)));
49053       emit_insn (gen_shift (dop1,
49054 			    gen_lowpart (half_mode, d->op1),
49055 			    GEN_INT (s)));
49056     }
49057   /* In AVX2 for 256 bit case we need to permute pack result.  */
49058   if (TARGET_AVX2 && end_perm)
49059     {
49060       op = gen_reg_rtx (d->vmode);
49061       t = gen_reg_rtx (V4DImode);
49062       emit_insn (gen_pack (op, dop0, dop1));
49063       emit_insn (gen_avx2_permv4di_1 (t,
49064 				      gen_lowpart (V4DImode, op),
49065 				      const0_rtx,
49066 				      const2_rtx,
49067 				      const1_rtx,
49068 				      GEN_INT (3)));
49069       emit_move_insn (d->target, gen_lowpart (d->vmode, t));
49070     }
49071   else
49072     emit_insn (gen_pack (d->target, dop0, dop1));
49073 
49074   return true;
49075 }
49076 
49077 /* A subroutine of expand_vec_perm_even_odd_1.  Implement extract-even
49078    and extract-odd permutations of two V64QI operands
49079    with two "shifts", two "truncs" and one "concat" insns for "odd"
49080    and two "truncs" and one concat insn for "even."
49081    Have already failed all two instruction sequences.  */
49082 
49083 static bool
49084 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
49085 {
49086   rtx t1, t2, t3, t4;
49087   unsigned i, odd, nelt = d->nelt;
49088 
49089   if (!TARGET_AVX512BW
49090       || d->one_operand_p
49091       || d->vmode != V64QImode)
49092     return false;
49093 
49094   /* Check that permutation is even or odd.  */
49095   odd = d->perm[0];
49096   if (odd > 1)
49097     return false;
49098 
49099   for (i = 1; i < nelt; ++i)
49100     if (d->perm[i] != 2 * i + odd)
49101       return false;
49102 
49103   if (d->testing_p)
49104     return true;
49105 
49106 
49107   if (odd)
49108     {
49109       t1 = gen_reg_rtx (V32HImode);
49110       t2 = gen_reg_rtx (V32HImode);
49111       emit_insn (gen_lshrv32hi3 (t1,
49112 				 gen_lowpart (V32HImode, d->op0),
49113 				 GEN_INT (8)));
49114       emit_insn (gen_lshrv32hi3 (t2,
49115 				 gen_lowpart (V32HImode, d->op1),
49116 				 GEN_INT (8)));
49117     }
49118   else
49119     {
49120       t1 = gen_lowpart (V32HImode, d->op0);
49121       t2 = gen_lowpart (V32HImode, d->op1);
49122     }
49123 
49124   t3 = gen_reg_rtx (V32QImode);
49125   t4 = gen_reg_rtx (V32QImode);
49126   emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
49127   emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
49128   emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
49129 
49130   return true;
49131 }
49132 
49133 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement extract-even
49134    and extract-odd permutations.  */
49135 
49136 static bool
49137 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
49138 {
49139   rtx t1, t2, t3, t4, t5;
49140 
49141   switch (d->vmode)
49142     {
49143     case V4DFmode:
49144       if (d->testing_p)
49145 	break;
49146       t1 = gen_reg_rtx (V4DFmode);
49147       t2 = gen_reg_rtx (V4DFmode);
49148 
49149       /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }.  */
49150       emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
49151       emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
49152 
49153       /* Now an unpck[lh]pd will produce the result required.  */
49154       if (odd)
49155 	t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
49156       else
49157 	t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
49158       emit_insn (t3);
49159       break;
49160 
49161     case V8SFmode:
49162       {
49163 	int mask = odd ? 0xdd : 0x88;
49164 
49165 	if (d->testing_p)
49166 	  break;
49167 	t1 = gen_reg_rtx (V8SFmode);
49168 	t2 = gen_reg_rtx (V8SFmode);
49169 	t3 = gen_reg_rtx (V8SFmode);
49170 
49171 	/* Shuffle within the 128-bit lanes to produce:
49172 	   { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }.  */
49173 	emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
49174 				      GEN_INT (mask)));
49175 
49176 	/* Shuffle the lanes around to produce:
49177 	   { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }.  */
49178 	emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
49179 					    GEN_INT (0x3)));
49180 
49181 	/* Shuffle within the 128-bit lanes to produce:
49182 	   { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }.  */
49183 	emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
49184 
49185 	/* Shuffle within the 128-bit lanes to produce:
49186 	   { 8 a c e c e 8 a } | { 9 b d f d f 9 b }.  */
49187 	emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
49188 
49189 	/* Shuffle the lanes around to produce:
49190 	   { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }.  */
49191 	emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
49192 					    GEN_INT (0x20)));
49193       }
49194       break;
49195 
49196     case V2DFmode:
49197     case V4SFmode:
49198     case V2DImode:
49199     case V4SImode:
49200       /* These are always directly implementable by expand_vec_perm_1.  */
49201       gcc_unreachable ();
49202 
49203     case V8HImode:
49204       if (TARGET_SSE4_1)
49205 	return expand_vec_perm_even_odd_pack (d);
49206       else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
49207 	return expand_vec_perm_pshufb2 (d);
49208       else
49209 	{
49210 	  if (d->testing_p)
49211 	    break;
49212 	  /* We need 2*log2(N)-1 operations to achieve odd/even
49213 	     with interleave. */
49214 	  t1 = gen_reg_rtx (V8HImode);
49215 	  t2 = gen_reg_rtx (V8HImode);
49216 	  emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
49217 	  emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
49218 	  emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
49219 	  emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
49220 	  if (odd)
49221 	    t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
49222 	  else
49223 	    t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
49224 	  emit_insn (t3);
49225 	}
49226       break;
49227 
49228     case V16QImode:
49229       return expand_vec_perm_even_odd_pack (d);
49230 
49231     case V16HImode:
49232     case V32QImode:
49233       return expand_vec_perm_even_odd_pack (d);
49234 
49235     case V64QImode:
49236       return expand_vec_perm_even_odd_trunc (d);
49237 
49238     case V4DImode:
49239       if (!TARGET_AVX2)
49240 	{
49241 	  struct expand_vec_perm_d d_copy = *d;
49242 	  d_copy.vmode = V4DFmode;
49243 	  if (d->testing_p)
49244 	    d_copy.target = gen_lowpart (V4DFmode, d->target);
49245 	  else
49246 	    d_copy.target = gen_reg_rtx (V4DFmode);
49247 	  d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
49248 	  d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
49249 	  if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49250 	    {
49251 	      if (!d->testing_p)
49252 		emit_move_insn (d->target,
49253 				gen_lowpart (V4DImode, d_copy.target));
49254 	      return true;
49255 	    }
49256 	  return false;
49257 	}
49258 
49259       if (d->testing_p)
49260 	break;
49261 
49262       t1 = gen_reg_rtx (V4DImode);
49263       t2 = gen_reg_rtx (V4DImode);
49264 
49265       /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }.  */
49266       emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
49267       emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
49268 
49269       /* Now an vpunpck[lh]qdq will produce the result required.  */
49270       if (odd)
49271 	t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
49272       else
49273 	t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
49274       emit_insn (t3);
49275       break;
49276 
49277     case V8SImode:
49278       if (!TARGET_AVX2)
49279 	{
49280 	  struct expand_vec_perm_d d_copy = *d;
49281 	  d_copy.vmode = V8SFmode;
49282 	  if (d->testing_p)
49283 	    d_copy.target = gen_lowpart (V8SFmode, d->target);
49284 	  else
49285 	    d_copy.target = gen_reg_rtx (V8SFmode);
49286 	  d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
49287 	  d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
49288 	  if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49289 	    {
49290 	      if (!d->testing_p)
49291 		emit_move_insn (d->target,
49292 				gen_lowpart (V8SImode, d_copy.target));
49293 	      return true;
49294 	    }
49295 	  return false;
49296 	}
49297 
49298       if (d->testing_p)
49299 	break;
49300 
49301       t1 = gen_reg_rtx (V8SImode);
49302       t2 = gen_reg_rtx (V8SImode);
49303       t3 = gen_reg_rtx (V4DImode);
49304       t4 = gen_reg_rtx (V4DImode);
49305       t5 = gen_reg_rtx (V4DImode);
49306 
49307       /* Shuffle the lanes around into
49308 	 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }.  */
49309       emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
49310 				    gen_lowpart (V4DImode, d->op1),
49311 				    GEN_INT (0x20)));
49312       emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
49313 				    gen_lowpart (V4DImode, d->op1),
49314 				    GEN_INT (0x31)));
49315 
49316       /* Swap the 2nd and 3rd position in each lane into
49317 	 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }.  */
49318       emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
49319 				    GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49320       emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
49321 				    GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49322 
49323       /* Now an vpunpck[lh]qdq will produce
49324 	 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }.  */
49325       if (odd)
49326 	t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
49327 					   gen_lowpart (V4DImode, t2));
49328       else
49329 	t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
49330 					  gen_lowpart (V4DImode, t2));
49331       emit_insn (t3);
49332       emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
49333       break;
49334 
49335     default:
49336       gcc_unreachable ();
49337     }
49338 
49339   return true;
49340 }
49341 
49342 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
49343    extract-even and extract-odd permutations.  */
49344 
49345 static bool
49346 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
49347 {
49348   unsigned i, odd, nelt = d->nelt;
49349 
49350   odd = d->perm[0];
49351   if (odd != 0 && odd != 1)
49352     return false;
49353 
49354   for (i = 1; i < nelt; ++i)
49355     if (d->perm[i] != 2 * i + odd)
49356       return false;
49357 
49358   return expand_vec_perm_even_odd_1 (d, odd);
49359 }
49360 
49361 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement broadcast
49362    permutations.  We assume that expand_vec_perm_1 has already failed.  */
49363 
49364 static bool
49365 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49366 {
49367   unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49368   machine_mode vmode = d->vmode;
49369   unsigned char perm2[4];
49370   rtx op0 = d->op0, dest;
49371   bool ok;
49372 
49373   switch (vmode)
49374     {
49375     case V4DFmode:
49376     case V8SFmode:
49377       /* These are special-cased in sse.md so that we can optionally
49378 	 use the vbroadcast instruction.  They expand to two insns
49379 	 if the input happens to be in a register.  */
49380       gcc_unreachable ();
49381 
49382     case V2DFmode:
49383     case V2DImode:
49384     case V4SFmode:
49385     case V4SImode:
49386       /* These are always implementable using standard shuffle patterns.  */
49387       gcc_unreachable ();
49388 
49389     case V8HImode:
49390     case V16QImode:
49391       /* These can be implemented via interleave.  We save one insn by
49392 	 stopping once we have promoted to V4SImode and then use pshufd.  */
49393       if (d->testing_p)
49394 	return true;
49395       do
49396 	{
49397 	  rtx dest;
49398 	  rtx (*gen) (rtx, rtx, rtx)
49399 	    = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49400 				 : gen_vec_interleave_lowv8hi;
49401 
49402 	  if (elt >= nelt2)
49403 	    {
49404 	      gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49405 				       : gen_vec_interleave_highv8hi;
49406 	      elt -= nelt2;
49407 	    }
49408 	  nelt2 /= 2;
49409 
49410 	  dest = gen_reg_rtx (vmode);
49411 	  emit_insn (gen (dest, op0, op0));
49412 	  vmode = get_mode_wider_vector (vmode);
49413 	  op0 = gen_lowpart (vmode, dest);
49414 	}
49415       while (vmode != V4SImode);
49416 
49417       memset (perm2, elt, 4);
49418       dest = gen_reg_rtx (V4SImode);
49419       ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49420       gcc_assert (ok);
49421       if (!d->testing_p)
49422 	emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49423       return true;
49424 
49425     case V64QImode:
49426     case V32QImode:
49427     case V16HImode:
49428     case V8SImode:
49429     case V4DImode:
49430       /* For AVX2 broadcasts of the first element vpbroadcast* or
49431 	 vpermq should be used by expand_vec_perm_1.  */
49432       gcc_assert (!TARGET_AVX2 || d->perm[0]);
49433       return false;
49434 
49435     default:
49436       gcc_unreachable ();
49437     }
49438 }
49439 
49440 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
49441    broadcast permutations.  */
49442 
49443 static bool
49444 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49445 {
49446   unsigned i, elt, nelt = d->nelt;
49447 
49448   if (!d->one_operand_p)
49449     return false;
49450 
49451   elt = d->perm[0];
49452   for (i = 1; i < nelt; ++i)
49453     if (d->perm[i] != elt)
49454       return false;
49455 
49456   return expand_vec_perm_broadcast_1 (d);
49457 }
49458 
49459 /* Implement arbitrary permutations of two V64QImode operands
49460    will 2 vpermi2w, 2 vpshufb and one vpor instruction.  */
49461 static bool
49462 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49463 {
49464   if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49465     return false;
49466 
49467   if (d->testing_p)
49468     return true;
49469 
49470   struct expand_vec_perm_d ds[2];
49471   rtx rperm[128], vperm, target0, target1;
49472   unsigned int i, nelt;
49473   machine_mode vmode;
49474 
49475   nelt = d->nelt;
49476   vmode = V64QImode;
49477 
49478   for (i = 0; i < 2; i++)
49479     {
49480       ds[i] = *d;
49481       ds[i].vmode = V32HImode;
49482       ds[i].nelt = 32;
49483       ds[i].target = gen_reg_rtx (V32HImode);
49484       ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49485       ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49486     }
49487 
49488   /* Prepare permutations such that the first one takes care of
49489      putting the even bytes into the right positions or one higher
49490      positions (ds[0]) and the second one takes care of
49491      putting the odd bytes into the right positions or one below
49492      (ds[1]).  */
49493 
49494   for (i = 0; i < nelt; i++)
49495     {
49496       ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49497       if (i & 1)
49498 	{
49499 	  rperm[i] = constm1_rtx;
49500 	  rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49501 	}
49502       else
49503 	{
49504 	  rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49505 	  rperm[i + 64] = constm1_rtx;
49506 	}
49507     }
49508 
49509   bool ok = expand_vec_perm_1 (&ds[0]);
49510   gcc_assert (ok);
49511   ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49512 
49513   ok = expand_vec_perm_1 (&ds[1]);
49514   gcc_assert (ok);
49515   ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49516 
49517   vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49518   vperm = force_reg (vmode, vperm);
49519   target0 = gen_reg_rtx (V64QImode);
49520   emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49521 
49522   vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49523   vperm = force_reg (vmode, vperm);
49524   target1 = gen_reg_rtx (V64QImode);
49525   emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49526 
49527   emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49528   return true;
49529 }
49530 
49531 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49532    with 4 vpshufb insns, 2 vpermq and 3 vpor.  We should have already failed
49533    all the shorter instruction sequences.  */
49534 
49535 static bool
49536 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49537 {
49538   rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49539   unsigned int i, nelt, eltsz;
49540   bool used[4];
49541 
49542   if (!TARGET_AVX2
49543       || d->one_operand_p
49544       || (d->vmode != V32QImode && d->vmode != V16HImode))
49545     return false;
49546 
49547   if (d->testing_p)
49548     return true;
49549 
49550   nelt = d->nelt;
49551   eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49552 
49553   /* Generate 4 permutation masks.  If the required element is within
49554      the same lane, it is shuffled in.  If the required element from the
49555      other lane, force a zero by setting bit 7 in the permutation mask.
49556      In the other mask the mask has non-negative elements if element
49557      is requested from the other lane, but also moved to the other lane,
49558      so that the result of vpshufb can have the two V2TImode halves
49559      swapped.  */
49560   m128 = GEN_INT (-128);
49561   for (i = 0; i < 32; ++i)
49562     {
49563       rperm[0][i] = m128;
49564       rperm[1][i] = m128;
49565       rperm[2][i] = m128;
49566       rperm[3][i] = m128;
49567     }
49568   used[0] = false;
49569   used[1] = false;
49570   used[2] = false;
49571   used[3] = false;
49572   for (i = 0; i < nelt; ++i)
49573     {
49574       unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49575       unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49576       unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49577 
49578       for (j = 0; j < eltsz; ++j)
49579 	rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49580       used[which] = true;
49581     }
49582 
49583   for (i = 0; i < 2; ++i)
49584     {
49585       if (!used[2 * i + 1])
49586 	{
49587 	  h[i] = NULL_RTX;
49588 	  continue;
49589 	}
49590       vperm = gen_rtx_CONST_VECTOR (V32QImode,
49591 				    gen_rtvec_v (32, rperm[2 * i + 1]));
49592       vperm = force_reg (V32QImode, vperm);
49593       h[i] = gen_reg_rtx (V32QImode);
49594       op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49595       emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49596     }
49597 
49598   /* Swap the 128-byte lanes of h[X].  */
49599   for (i = 0; i < 2; ++i)
49600    {
49601      if (h[i] == NULL_RTX)
49602        continue;
49603      op = gen_reg_rtx (V4DImode);
49604      emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49605 				     const2_rtx, GEN_INT (3), const0_rtx,
49606 				     const1_rtx));
49607      h[i] = gen_lowpart (V32QImode, op);
49608    }
49609 
49610   for (i = 0; i < 2; ++i)
49611     {
49612       if (!used[2 * i])
49613 	{
49614 	  l[i] = NULL_RTX;
49615 	  continue;
49616 	}
49617       vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49618       vperm = force_reg (V32QImode, vperm);
49619       l[i] = gen_reg_rtx (V32QImode);
49620       op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49621       emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49622     }
49623 
49624   for (i = 0; i < 2; ++i)
49625     {
49626       if (h[i] && l[i])
49627 	{
49628 	  op = gen_reg_rtx (V32QImode);
49629 	  emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49630 	  l[i] = op;
49631 	}
49632       else if (h[i])
49633 	l[i] = h[i];
49634     }
49635 
49636   gcc_assert (l[0] && l[1]);
49637   op = d->target;
49638   if (d->vmode != V32QImode)
49639     op = gen_reg_rtx (V32QImode);
49640   emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49641   if (op != d->target)
49642     emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49643   return true;
49644 }
49645 
49646 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49647    With all of the interface bits taken care of, perform the expansion
49648    in D and return true on success.  */
49649 
49650 static bool
49651 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49652 {
49653   /* Try a single instruction expansion.  */
49654   if (expand_vec_perm_1 (d))
49655     return true;
49656 
49657   /* Try sequences of two instructions.  */
49658 
49659   if (expand_vec_perm_pshuflw_pshufhw (d))
49660     return true;
49661 
49662   if (expand_vec_perm_palignr (d, false))
49663     return true;
49664 
49665   if (expand_vec_perm_interleave2 (d))
49666     return true;
49667 
49668   if (expand_vec_perm_broadcast (d))
49669     return true;
49670 
49671   if (expand_vec_perm_vpermq_perm_1 (d))
49672     return true;
49673 
49674   if (expand_vec_perm_vperm2f128 (d))
49675     return true;
49676 
49677   if (expand_vec_perm_pblendv (d))
49678     return true;
49679 
49680   /* Try sequences of three instructions.  */
49681 
49682   if (expand_vec_perm_even_odd_pack (d))
49683     return true;
49684 
49685   if (expand_vec_perm_2vperm2f128_vshuf (d))
49686     return true;
49687 
49688   if (expand_vec_perm_pshufb2 (d))
49689     return true;
49690 
49691   if (expand_vec_perm_interleave3 (d))
49692     return true;
49693 
49694   if (expand_vec_perm_vperm2f128_vblend (d))
49695     return true;
49696 
49697   /* Try sequences of four instructions.  */
49698 
49699   if (expand_vec_perm_even_odd_trunc (d))
49700     return true;
49701   if (expand_vec_perm_vpshufb2_vpermq (d))
49702     return true;
49703 
49704   if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49705     return true;
49706 
49707   if (expand_vec_perm_vpermi2_vpshub2 (d))
49708     return true;
49709 
49710   /* ??? Look for narrow permutations whose element orderings would
49711      allow the promotion to a wider mode.  */
49712 
49713   /* ??? Look for sequences of interleave or a wider permute that place
49714      the data into the correct lanes for a half-vector shuffle like
49715      pshuf[lh]w or vpermilps.  */
49716 
49717   /* ??? Look for sequences of interleave that produce the desired results.
49718      The combinatorics of punpck[lh] get pretty ugly... */
49719 
49720   if (expand_vec_perm_even_odd (d))
49721     return true;
49722 
49723   /* Even longer sequences.  */
49724   if (expand_vec_perm_vpshufb4_vpermq2 (d))
49725     return true;
49726 
49727   return false;
49728 }
49729 
49730 /* If a permutation only uses one operand, make it clear. Returns true
49731    if the permutation references both operands.  */
49732 
49733 static bool
49734 canonicalize_perm (struct expand_vec_perm_d *d)
49735 {
49736   int i, which, nelt = d->nelt;
49737 
49738   for (i = which = 0; i < nelt; ++i)
49739       which |= (d->perm[i] < nelt ? 1 : 2);
49740 
49741   d->one_operand_p = true;
49742   switch (which)
49743     {
49744     default:
49745       gcc_unreachable();
49746 
49747     case 3:
49748       if (!rtx_equal_p (d->op0, d->op1))
49749         {
49750 	  d->one_operand_p = false;
49751 	  break;
49752         }
49753       /* The elements of PERM do not suggest that only the first operand
49754 	 is used, but both operands are identical.  Allow easier matching
49755 	 of the permutation by folding the permutation into the single
49756 	 input vector.  */
49757       /* FALLTHRU */
49758 
49759     case 2:
49760       for (i = 0; i < nelt; ++i)
49761         d->perm[i] &= nelt - 1;
49762       d->op0 = d->op1;
49763       break;
49764 
49765     case 1:
49766       d->op1 = d->op0;
49767       break;
49768     }
49769 
49770   return (which == 3);
49771 }
49772 
49773 bool
49774 ix86_expand_vec_perm_const (rtx operands[4])
49775 {
49776   struct expand_vec_perm_d d;
49777   unsigned char perm[MAX_VECT_LEN];
49778   int i, nelt;
49779   bool two_args;
49780   rtx sel;
49781 
49782   d.target = operands[0];
49783   d.op0 = operands[1];
49784   d.op1 = operands[2];
49785   sel = operands[3];
49786 
49787   d.vmode = GET_MODE (d.target);
49788   gcc_assert (VECTOR_MODE_P (d.vmode));
49789   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49790   d.testing_p = false;
49791 
49792   gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49793   gcc_assert (XVECLEN (sel, 0) == nelt);
49794   gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49795 
49796   for (i = 0; i < nelt; ++i)
49797     {
49798       rtx e = XVECEXP (sel, 0, i);
49799       int ei = INTVAL (e) & (2 * nelt - 1);
49800       d.perm[i] = ei;
49801       perm[i] = ei;
49802     }
49803 
49804   two_args = canonicalize_perm (&d);
49805 
49806   if (ix86_expand_vec_perm_const_1 (&d))
49807     return true;
49808 
49809   /* If the selector says both arguments are needed, but the operands are the
49810      same, the above tried to expand with one_operand_p and flattened selector.
49811      If that didn't work, retry without one_operand_p; we succeeded with that
49812      during testing.  */
49813   if (two_args && d.one_operand_p)
49814     {
49815       d.one_operand_p = false;
49816       memcpy (d.perm, perm, sizeof (perm));
49817       return ix86_expand_vec_perm_const_1 (&d);
49818     }
49819 
49820   return false;
49821 }
49822 
49823 /* Implement targetm.vectorize.vec_perm_const_ok.  */
49824 
49825 static bool
49826 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49827 				  const unsigned char *sel)
49828 {
49829   struct expand_vec_perm_d d;
49830   unsigned int i, nelt, which;
49831   bool ret;
49832 
49833   d.vmode = vmode;
49834   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49835   d.testing_p = true;
49836 
49837   /* Given sufficient ISA support we can just return true here
49838      for selected vector modes.  */
49839   switch (d.vmode)
49840     {
49841     case V16SFmode:
49842     case V16SImode:
49843     case V8DImode:
49844     case V8DFmode:
49845       if (TARGET_AVX512F)
49846 	/* All implementable with a single vpermi2 insn.  */
49847 	return true;
49848       break;
49849     case V32HImode:
49850       if (TARGET_AVX512BW)
49851 	/* All implementable with a single vpermi2 insn.  */
49852 	return true;
49853       break;
49854     case V64QImode:
49855       if (TARGET_AVX512BW)
49856 	/* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn.  */
49857 	return true;
49858       break;
49859     case V8SImode:
49860     case V8SFmode:
49861     case V4DFmode:
49862     case V4DImode:
49863       if (TARGET_AVX512VL)
49864 	/* All implementable with a single vpermi2 insn.  */
49865 	return true;
49866       break;
49867     case V16HImode:
49868       if (TARGET_AVX2)
49869 	/* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns.  */
49870 	return true;
49871       break;
49872     case V32QImode:
49873       if (TARGET_AVX2)
49874 	/* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns.  */
49875 	return true;
49876       break;
49877     case V4SImode:
49878     case V4SFmode:
49879     case V8HImode:
49880     case V16QImode:
49881       /* All implementable with a single vpperm insn.  */
49882       if (TARGET_XOP)
49883 	return true;
49884       /* All implementable with 2 pshufb + 1 ior.  */
49885       if (TARGET_SSSE3)
49886 	return true;
49887       break;
49888     case V2DImode:
49889     case V2DFmode:
49890       /* All implementable with shufpd or unpck[lh]pd.  */
49891       return true;
49892     default:
49893       return false;
49894     }
49895 
49896   /* Extract the values from the vector CST into the permutation
49897      array in D.  */
49898   memcpy (d.perm, sel, nelt);
49899   for (i = which = 0; i < nelt; ++i)
49900     {
49901       unsigned char e = d.perm[i];
49902       gcc_assert (e < 2 * nelt);
49903       which |= (e < nelt ? 1 : 2);
49904     }
49905 
49906   /* For all elements from second vector, fold the elements to first.  */
49907   if (which == 2)
49908     for (i = 0; i < nelt; ++i)
49909       d.perm[i] -= nelt;
49910 
49911   /* Check whether the mask can be applied to the vector type.  */
49912   d.one_operand_p = (which != 3);
49913 
49914   /* Implementable with shufps or pshufd.  */
49915   if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49916     return true;
49917 
49918   /* Otherwise we have to go through the motions and see if we can
49919      figure out how to generate the requested permutation.  */
49920   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49921   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49922   if (!d.one_operand_p)
49923     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49924 
49925   start_sequence ();
49926   ret = ix86_expand_vec_perm_const_1 (&d);
49927   end_sequence ();
49928 
49929   return ret;
49930 }
49931 
49932 void
49933 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49934 {
49935   struct expand_vec_perm_d d;
49936   unsigned i, nelt;
49937 
49938   d.target = targ;
49939   d.op0 = op0;
49940   d.op1 = op1;
49941   d.vmode = GET_MODE (targ);
49942   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49943   d.one_operand_p = false;
49944   d.testing_p = false;
49945 
49946   for (i = 0; i < nelt; ++i)
49947     d.perm[i] = i * 2 + odd;
49948 
49949   /* We'll either be able to implement the permutation directly...  */
49950   if (expand_vec_perm_1 (&d))
49951     return;
49952 
49953   /* ... or we use the special-case patterns.  */
49954   expand_vec_perm_even_odd_1 (&d, odd);
49955 }
49956 
49957 static void
49958 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49959 {
49960   struct expand_vec_perm_d d;
49961   unsigned i, nelt, base;
49962   bool ok;
49963 
49964   d.target = targ;
49965   d.op0 = op0;
49966   d.op1 = op1;
49967   d.vmode = GET_MODE (targ);
49968   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49969   d.one_operand_p = false;
49970   d.testing_p = false;
49971 
49972   base = high_p ? nelt / 2 : 0;
49973   for (i = 0; i < nelt / 2; ++i)
49974     {
49975       d.perm[i * 2] = i + base;
49976       d.perm[i * 2 + 1] = i + base + nelt;
49977     }
49978 
49979   /* Note that for AVX this isn't one instruction.  */
49980   ok = ix86_expand_vec_perm_const_1 (&d);
49981   gcc_assert (ok);
49982 }
49983 
49984 
49985 /* Expand a vector operation CODE for a V*QImode in terms of the
49986    same operation on V*HImode.  */
49987 
49988 void
49989 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49990 {
49991   machine_mode qimode = GET_MODE (dest);
49992   machine_mode himode;
49993   rtx (*gen_il) (rtx, rtx, rtx);
49994   rtx (*gen_ih) (rtx, rtx, rtx);
49995   rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49996   struct expand_vec_perm_d d;
49997   bool ok, full_interleave;
49998   bool uns_p = false;
49999   int i;
50000 
50001   switch (qimode)
50002     {
50003     case V16QImode:
50004       himode = V8HImode;
50005       gen_il = gen_vec_interleave_lowv16qi;
50006       gen_ih = gen_vec_interleave_highv16qi;
50007       break;
50008     case V32QImode:
50009       himode = V16HImode;
50010       gen_il = gen_avx2_interleave_lowv32qi;
50011       gen_ih = gen_avx2_interleave_highv32qi;
50012       break;
50013     case V64QImode:
50014       himode = V32HImode;
50015       gen_il = gen_avx512bw_interleave_lowv64qi;
50016       gen_ih = gen_avx512bw_interleave_highv64qi;
50017       break;
50018     default:
50019       gcc_unreachable ();
50020     }
50021 
50022   op2_l = op2_h = op2;
50023   switch (code)
50024     {
50025     case MULT:
50026       /* Unpack data such that we've got a source byte in each low byte of
50027 	 each word.  We don't care what goes into the high byte of each word.
50028 	 Rather than trying to get zero in there, most convenient is to let
50029 	 it be a copy of the low byte.  */
50030       op2_l = gen_reg_rtx (qimode);
50031       op2_h = gen_reg_rtx (qimode);
50032       emit_insn (gen_il (op2_l, op2, op2));
50033       emit_insn (gen_ih (op2_h, op2, op2));
50034       /* FALLTHRU */
50035 
50036       op1_l = gen_reg_rtx (qimode);
50037       op1_h = gen_reg_rtx (qimode);
50038       emit_insn (gen_il (op1_l, op1, op1));
50039       emit_insn (gen_ih (op1_h, op1, op1));
50040       full_interleave = qimode == V16QImode;
50041       break;
50042 
50043     case ASHIFT:
50044     case LSHIFTRT:
50045       uns_p = true;
50046       /* FALLTHRU */
50047     case ASHIFTRT:
50048       op1_l = gen_reg_rtx (himode);
50049       op1_h = gen_reg_rtx (himode);
50050       ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
50051       ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
50052       full_interleave = true;
50053       break;
50054     default:
50055       gcc_unreachable ();
50056     }
50057 
50058   /* Perform the operation.  */
50059   res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
50060 			       1, OPTAB_DIRECT);
50061   res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
50062 			       1, OPTAB_DIRECT);
50063   gcc_assert (res_l && res_h);
50064 
50065   /* Merge the data back into the right place.  */
50066   d.target = dest;
50067   d.op0 = gen_lowpart (qimode, res_l);
50068   d.op1 = gen_lowpart (qimode, res_h);
50069   d.vmode = qimode;
50070   d.nelt = GET_MODE_NUNITS (qimode);
50071   d.one_operand_p = false;
50072   d.testing_p = false;
50073 
50074   if (full_interleave)
50075     {
50076       /* For SSE2, we used an full interleave, so the desired
50077 	 results are in the even elements.  */
50078       for (i = 0; i < d.nelt; ++i)
50079 	d.perm[i] = i * 2;
50080     }
50081   else
50082     {
50083       /* For AVX, the interleave used above was not cross-lane.  So the
50084 	 extraction is evens but with the second and third quarter swapped.
50085 	 Happily, that is even one insn shorter than even extraction.
50086 	 For AVX512BW we have 4 lanes.  We extract evens from within a lane,
50087 	 always first from the first and then from the second source operand,
50088 	 the index bits above the low 4 bits remains the same.
50089 	 Thus, for d.nelt == 32 we want permutation
50090 	 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
50091 	 and for d.nelt == 64 we want permutation
50092 	 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
50093 	 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126.  */
50094       for (i = 0; i < d.nelt; ++i)
50095 	d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
50096     }
50097 
50098   ok = ix86_expand_vec_perm_const_1 (&d);
50099   gcc_assert (ok);
50100 
50101   set_unique_reg_note (get_last_insn (), REG_EQUAL,
50102 		       gen_rtx_fmt_ee (code, qimode, op1, op2));
50103 }
50104 
50105 /* Helper function of ix86_expand_mul_widen_evenodd.  Return true
50106    if op is CONST_VECTOR with all odd elements equal to their
50107    preceding element.  */
50108 
50109 static bool
50110 const_vector_equal_evenodd_p (rtx op)
50111 {
50112   machine_mode mode = GET_MODE (op);
50113   int i, nunits = GET_MODE_NUNITS (mode);
50114   if (GET_CODE (op) != CONST_VECTOR
50115       || nunits != CONST_VECTOR_NUNITS (op))
50116     return false;
50117   for (i = 0; i < nunits; i += 2)
50118     if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
50119       return false;
50120   return true;
50121 }
50122 
50123 void
50124 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
50125 			       bool uns_p, bool odd_p)
50126 {
50127   machine_mode mode = GET_MODE (op1);
50128   machine_mode wmode = GET_MODE (dest);
50129   rtx x;
50130   rtx orig_op1 = op1, orig_op2 = op2;
50131 
50132   if (!nonimmediate_operand (op1, mode))
50133     op1 = force_reg (mode, op1);
50134   if (!nonimmediate_operand (op2, mode))
50135     op2 = force_reg (mode, op2);
50136 
50137   /* We only play even/odd games with vectors of SImode.  */
50138   gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
50139 
50140   /* If we're looking for the odd results, shift those members down to
50141      the even slots.  For some cpus this is faster than a PSHUFD.  */
50142   if (odd_p)
50143     {
50144       /* For XOP use vpmacsdqh, but only for smult, as it is only
50145 	 signed.  */
50146       if (TARGET_XOP && mode == V4SImode && !uns_p)
50147 	{
50148 	  x = force_reg (wmode, CONST0_RTX (wmode));
50149 	  emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
50150 	  return;
50151 	}
50152 
50153       x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
50154       if (!const_vector_equal_evenodd_p (orig_op1))
50155 	op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
50156 			    x, NULL, 1, OPTAB_DIRECT);
50157       if (!const_vector_equal_evenodd_p (orig_op2))
50158 	op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
50159 			    x, NULL, 1, OPTAB_DIRECT);
50160       op1 = gen_lowpart (mode, op1);
50161       op2 = gen_lowpart (mode, op2);
50162     }
50163 
50164   if (mode == V16SImode)
50165     {
50166       if (uns_p)
50167 	x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
50168       else
50169 	x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
50170     }
50171   else if (mode == V8SImode)
50172     {
50173       if (uns_p)
50174 	x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
50175       else
50176 	x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
50177     }
50178   else if (uns_p)
50179     x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
50180   else if (TARGET_SSE4_1)
50181     x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
50182   else
50183     {
50184       rtx s1, s2, t0, t1, t2;
50185 
50186       /* The easiest way to implement this without PMULDQ is to go through
50187 	 the motions as if we are performing a full 64-bit multiply.  With
50188 	 the exception that we need to do less shuffling of the elements.  */
50189 
50190       /* Compute the sign-extension, aka highparts, of the two operands.  */
50191       s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50192 				op1, pc_rtx, pc_rtx);
50193       s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50194 				op2, pc_rtx, pc_rtx);
50195 
50196       /* Multiply LO(A) * HI(B), and vice-versa.  */
50197       t1 = gen_reg_rtx (wmode);
50198       t2 = gen_reg_rtx (wmode);
50199       emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
50200       emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
50201 
50202       /* Multiply LO(A) * LO(B).  */
50203       t0 = gen_reg_rtx (wmode);
50204       emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
50205 
50206       /* Combine and shift the highparts into place.  */
50207       t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
50208       t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
50209 			 1, OPTAB_DIRECT);
50210 
50211       /* Combine high and low parts.  */
50212       force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
50213       return;
50214     }
50215   emit_insn (x);
50216 }
50217 
50218 void
50219 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
50220 			    bool uns_p, bool high_p)
50221 {
50222   machine_mode wmode = GET_MODE (dest);
50223   machine_mode mode = GET_MODE (op1);
50224   rtx t1, t2, t3, t4, mask;
50225 
50226   switch (mode)
50227     {
50228     case V4SImode:
50229       t1 = gen_reg_rtx (mode);
50230       t2 = gen_reg_rtx (mode);
50231       if (TARGET_XOP && !uns_p)
50232 	{
50233 	  /* With XOP, we have pmacsdqh, aka mul_widen_odd.  In this case,
50234 	     shuffle the elements once so that all elements are in the right
50235 	     place for immediate use: { A C B D }.  */
50236 	  emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
50237 					const1_rtx, GEN_INT (3)));
50238 	  emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
50239 					const1_rtx, GEN_INT (3)));
50240 	}
50241       else
50242 	{
50243 	  /* Put the elements into place for the multiply.  */
50244 	  ix86_expand_vec_interleave (t1, op1, op1, high_p);
50245 	  ix86_expand_vec_interleave (t2, op2, op2, high_p);
50246 	  high_p = false;
50247 	}
50248       ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
50249       break;
50250 
50251     case V8SImode:
50252       /* Shuffle the elements between the lanes.  After this we
50253 	 have { A B E F | C D G H } for each operand.  */
50254       t1 = gen_reg_rtx (V4DImode);
50255       t2 = gen_reg_rtx (V4DImode);
50256       emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
50257 				      const0_rtx, const2_rtx,
50258 				      const1_rtx, GEN_INT (3)));
50259       emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
50260 				      const0_rtx, const2_rtx,
50261 				      const1_rtx, GEN_INT (3)));
50262 
50263       /* Shuffle the elements within the lanes.  After this we
50264 	 have { A A B B | C C D D } or { E E F F | G G H H }.  */
50265       t3 = gen_reg_rtx (V8SImode);
50266       t4 = gen_reg_rtx (V8SImode);
50267       mask = GEN_INT (high_p
50268 		      ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
50269 		      : 0 + (0 << 2) + (1 << 4) + (1 << 6));
50270       emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
50271       emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
50272 
50273       ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
50274       break;
50275 
50276     case V8HImode:
50277     case V16HImode:
50278       t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
50279 			 uns_p, OPTAB_DIRECT);
50280       t2 = expand_binop (mode,
50281 			 uns_p ? umul_highpart_optab : smul_highpart_optab,
50282 			 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
50283       gcc_assert (t1 && t2);
50284 
50285       t3 = gen_reg_rtx (mode);
50286       ix86_expand_vec_interleave (t3, t1, t2, high_p);
50287       emit_move_insn (dest, gen_lowpart (wmode, t3));
50288       break;
50289 
50290     case V16QImode:
50291     case V32QImode:
50292     case V32HImode:
50293     case V16SImode:
50294     case V64QImode:
50295       t1 = gen_reg_rtx (wmode);
50296       t2 = gen_reg_rtx (wmode);
50297       ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
50298       ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
50299 
50300       emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
50301       break;
50302 
50303     default:
50304       gcc_unreachable ();
50305     }
50306 }
50307 
50308 void
50309 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
50310 {
50311   rtx res_1, res_2, res_3, res_4;
50312 
50313   res_1 = gen_reg_rtx (V4SImode);
50314   res_2 = gen_reg_rtx (V4SImode);
50315   res_3 = gen_reg_rtx (V2DImode);
50316   res_4 = gen_reg_rtx (V2DImode);
50317   ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
50318   ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
50319 
50320   /* Move the results in element 2 down to element 1; we don't care
50321      what goes in elements 2 and 3.  Then we can merge the parts
50322      back together with an interleave.
50323 
50324      Note that two other sequences were tried:
50325      (1) Use interleaves at the start instead of psrldq, which allows
50326      us to use a single shufps to merge things back at the end.
50327      (2) Use shufps here to combine the two vectors, then pshufd to
50328      put the elements in the correct order.
50329      In both cases the cost of the reformatting stall was too high
50330      and the overall sequence slower.  */
50331 
50332   emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
50333 				const0_rtx, const2_rtx,
50334 				const0_rtx, const0_rtx));
50335   emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
50336 				const0_rtx, const2_rtx,
50337 				const0_rtx, const0_rtx));
50338   res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
50339 
50340   set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
50341 }
50342 
50343 void
50344 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
50345 {
50346   machine_mode mode = GET_MODE (op0);
50347   rtx t1, t2, t3, t4, t5, t6;
50348 
50349   if (TARGET_AVX512DQ && mode == V8DImode)
50350     emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
50351   else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
50352     emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
50353   else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
50354     emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
50355   else if (TARGET_XOP && mode == V2DImode)
50356     {
50357       /* op1: A,B,C,D, op2: E,F,G,H */
50358       op1 = gen_lowpart (V4SImode, op1);
50359       op2 = gen_lowpart (V4SImode, op2);
50360 
50361       t1 = gen_reg_rtx (V4SImode);
50362       t2 = gen_reg_rtx (V4SImode);
50363       t3 = gen_reg_rtx (V2DImode);
50364       t4 = gen_reg_rtx (V2DImode);
50365 
50366       /* t1: B,A,D,C */
50367       emit_insn (gen_sse2_pshufd_1 (t1, op1,
50368 				    GEN_INT (1),
50369 				    GEN_INT (0),
50370 				    GEN_INT (3),
50371 				    GEN_INT (2)));
50372 
50373       /* t2: (B*E),(A*F),(D*G),(C*H) */
50374       emit_insn (gen_mulv4si3 (t2, t1, op2));
50375 
50376       /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50377       emit_insn (gen_xop_phadddq (t3, t2));
50378 
50379       /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50380       emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50381 
50382       /* Multiply lower parts and add all */
50383       t5 = gen_reg_rtx (V2DImode);
50384       emit_insn (gen_vec_widen_umult_even_v4si (t5,
50385 					gen_lowpart (V4SImode, op1),
50386 					gen_lowpart (V4SImode, op2)));
50387       op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50388 
50389     }
50390   else
50391     {
50392       machine_mode nmode;
50393       rtx (*umul) (rtx, rtx, rtx);
50394 
50395       if (mode == V2DImode)
50396 	{
50397 	  umul = gen_vec_widen_umult_even_v4si;
50398 	  nmode = V4SImode;
50399 	}
50400       else if (mode == V4DImode)
50401 	{
50402 	  umul = gen_vec_widen_umult_even_v8si;
50403 	  nmode = V8SImode;
50404 	}
50405       else if (mode == V8DImode)
50406 	{
50407 	  umul = gen_vec_widen_umult_even_v16si;
50408 	  nmode = V16SImode;
50409 	}
50410       else
50411 	gcc_unreachable ();
50412 
50413 
50414       /* Multiply low parts.  */
50415       t1 = gen_reg_rtx (mode);
50416       emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50417 
50418       /* Shift input vectors right 32 bits so we can multiply high parts.  */
50419       t6 = GEN_INT (32);
50420       t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50421       t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50422 
50423       /* Multiply high parts by low parts.  */
50424       t4 = gen_reg_rtx (mode);
50425       t5 = gen_reg_rtx (mode);
50426       emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50427       emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50428 
50429       /* Combine and shift the highparts back.  */
50430       t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50431       t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50432 
50433       /* Combine high and low parts.  */
50434       force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50435     }
50436 
50437   set_unique_reg_note (get_last_insn (), REG_EQUAL,
50438 		       gen_rtx_MULT (mode, op1, op2));
50439 }
50440 
50441 /* Return 1 if control tansfer instruction INSN
50442    should be encoded with bnd prefix.
50443    If insn is NULL then return 1 when control
50444    transfer instructions should be prefixed with
50445    bnd by default for current function.  */
50446 
50447 bool
50448 ix86_bnd_prefixed_insn_p (rtx insn)
50449 {
50450   /* For call insns check special flag.  */
50451   if (insn && CALL_P (insn))
50452     {
50453       rtx call = get_call_rtx_from (insn);
50454       if (call)
50455 	return CALL_EXPR_WITH_BOUNDS_P (call);
50456     }
50457 
50458   /* All other insns are prefixed only if function is instrumented.  */
50459   return chkp_function_instrumented_p (current_function_decl);
50460 }
50461 
50462 /* Calculate integer abs() using only SSE2 instructions.  */
50463 
50464 void
50465 ix86_expand_sse2_abs (rtx target, rtx input)
50466 {
50467   machine_mode mode = GET_MODE (target);
50468   rtx tmp0, tmp1, x;
50469 
50470   switch (mode)
50471     {
50472       /* For 32-bit signed integer X, the best way to calculate the absolute
50473 	 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)).  */
50474       case V4SImode:
50475 	tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50476 				    GEN_INT (GET_MODE_BITSIZE
50477 					     (GET_MODE_INNER (mode)) - 1),
50478 				    NULL, 0, OPTAB_DIRECT);
50479 	tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50480 				    NULL, 0, OPTAB_DIRECT);
50481 	x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50482 				 target, 0, OPTAB_DIRECT);
50483 	break;
50484 
50485       /* For 16-bit signed integer X, the best way to calculate the absolute
50486 	 value of X is max (X, -X), as SSE2 provides the PMAXSW insn.  */
50487       case V8HImode:
50488 	tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50489 
50490 	x = expand_simple_binop (mode, SMAX, tmp0, input,
50491 				 target, 0, OPTAB_DIRECT);
50492 	break;
50493 
50494       /* For 8-bit signed integer X, the best way to calculate the absolute
50495 	 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50496 	 as SSE2 provides the PMINUB insn.  */
50497       case V16QImode:
50498 	tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50499 
50500 	x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50501 				 target, 0, OPTAB_DIRECT);
50502 	break;
50503 
50504       default:
50505 	gcc_unreachable ();
50506     }
50507 
50508   if (x != target)
50509     emit_move_insn (target, x);
50510 }
50511 
50512 /* Expand an insert into a vector register through pinsr insn.
50513    Return true if successful.  */
50514 
50515 bool
50516 ix86_expand_pinsr (rtx *operands)
50517 {
50518   rtx dst = operands[0];
50519   rtx src = operands[3];
50520 
50521   unsigned int size = INTVAL (operands[1]);
50522   unsigned int pos = INTVAL (operands[2]);
50523 
50524   if (GET_CODE (src) == SUBREG)
50525     {
50526       /* Reject non-lowpart subregs.  */
50527       if (SUBREG_BYTE (src) != 0)
50528        return false;
50529       src = SUBREG_REG (src);
50530     }
50531 
50532   if (GET_CODE (dst) == SUBREG)
50533     {
50534       pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50535       dst = SUBREG_REG (dst);
50536     }
50537 
50538   switch (GET_MODE (dst))
50539     {
50540     case V16QImode:
50541     case V8HImode:
50542     case V4SImode:
50543     case V2DImode:
50544       {
50545 	machine_mode srcmode, dstmode;
50546 	rtx (*pinsr)(rtx, rtx, rtx, rtx);
50547 
50548 	srcmode = mode_for_size (size, MODE_INT, 0);
50549 
50550 	switch (srcmode)
50551 	  {
50552 	  case QImode:
50553 	    if (!TARGET_SSE4_1)
50554 	      return false;
50555 	    dstmode = V16QImode;
50556 	    pinsr = gen_sse4_1_pinsrb;
50557 	    break;
50558 
50559 	  case HImode:
50560 	    if (!TARGET_SSE2)
50561 	      return false;
50562 	    dstmode = V8HImode;
50563 	    pinsr = gen_sse2_pinsrw;
50564 	    break;
50565 
50566 	  case SImode:
50567 	    if (!TARGET_SSE4_1)
50568 	      return false;
50569 	    dstmode = V4SImode;
50570 	    pinsr = gen_sse4_1_pinsrd;
50571 	    break;
50572 
50573 	  case DImode:
50574 	    gcc_assert (TARGET_64BIT);
50575 	    if (!TARGET_SSE4_1)
50576 	      return false;
50577 	    dstmode = V2DImode;
50578 	    pinsr = gen_sse4_1_pinsrq;
50579 	    break;
50580 
50581 	  default:
50582 	    return false;
50583 	  }
50584 
50585 	/* Reject insertions to misaligned positions.  */
50586 	if (pos & (size-1))
50587 	  return false;
50588 
50589 	rtx d = dst;
50590 	if (GET_MODE (dst) != dstmode)
50591 	  d = gen_reg_rtx (dstmode);
50592 	src = gen_lowpart (srcmode, src);
50593 
50594 	pos /= size;
50595 
50596 	emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50597 			  GEN_INT (1 << pos)));
50598 	if (d != dst)
50599 	  emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50600 	return true;
50601       }
50602 
50603     default:
50604       return false;
50605     }
50606 }
50607 
50608 /* This function returns the calling abi specific va_list type node.
50609    It returns  the FNDECL specific va_list type.  */
50610 
50611 static tree
50612 ix86_fn_abi_va_list (tree fndecl)
50613 {
50614   if (!TARGET_64BIT)
50615     return va_list_type_node;
50616   gcc_assert (fndecl != NULL_TREE);
50617 
50618   if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50619     return ms_va_list_type_node;
50620   else
50621     return sysv_va_list_type_node;
50622 }
50623 
50624 /* Returns the canonical va_list type specified by TYPE. If there
50625    is no valid TYPE provided, it return NULL_TREE.  */
50626 
50627 static tree
50628 ix86_canonical_va_list_type (tree type)
50629 {
50630   tree wtype, htype;
50631 
50632   /* Resolve references and pointers to va_list type.  */
50633   if (TREE_CODE (type) == MEM_REF)
50634     type = TREE_TYPE (type);
50635   else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50636     type = TREE_TYPE (type);
50637   else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50638     type = TREE_TYPE (type);
50639 
50640   if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50641     {
50642       wtype = va_list_type_node;
50643 	  gcc_assert (wtype != NULL_TREE);
50644       htype = type;
50645       if (TREE_CODE (wtype) == ARRAY_TYPE)
50646 	{
50647 	  /* If va_list is an array type, the argument may have decayed
50648 	     to a pointer type, e.g. by being passed to another function.
50649 	     In that case, unwrap both types so that we can compare the
50650 	     underlying records.  */
50651 	  if (TREE_CODE (htype) == ARRAY_TYPE
50652 	      || POINTER_TYPE_P (htype))
50653 	    {
50654 	      wtype = TREE_TYPE (wtype);
50655 	      htype = TREE_TYPE (htype);
50656 	    }
50657 	}
50658       if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50659 	return va_list_type_node;
50660       wtype = sysv_va_list_type_node;
50661 	  gcc_assert (wtype != NULL_TREE);
50662       htype = type;
50663       if (TREE_CODE (wtype) == ARRAY_TYPE)
50664 	{
50665 	  /* If va_list is an array type, the argument may have decayed
50666 	     to a pointer type, e.g. by being passed to another function.
50667 	     In that case, unwrap both types so that we can compare the
50668 	     underlying records.  */
50669 	  if (TREE_CODE (htype) == ARRAY_TYPE
50670 	      || POINTER_TYPE_P (htype))
50671 	    {
50672 	      wtype = TREE_TYPE (wtype);
50673 	      htype = TREE_TYPE (htype);
50674 	    }
50675 	}
50676       if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50677 	return sysv_va_list_type_node;
50678       wtype = ms_va_list_type_node;
50679 	  gcc_assert (wtype != NULL_TREE);
50680       htype = type;
50681       if (TREE_CODE (wtype) == ARRAY_TYPE)
50682 	{
50683 	  /* If va_list is an array type, the argument may have decayed
50684 	     to a pointer type, e.g. by being passed to another function.
50685 	     In that case, unwrap both types so that we can compare the
50686 	     underlying records.  */
50687 	  if (TREE_CODE (htype) == ARRAY_TYPE
50688 	      || POINTER_TYPE_P (htype))
50689 	    {
50690 	      wtype = TREE_TYPE (wtype);
50691 	      htype = TREE_TYPE (htype);
50692 	    }
50693 	}
50694       if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50695 	return ms_va_list_type_node;
50696       return NULL_TREE;
50697     }
50698   return std_canonical_va_list_type (type);
50699 }
50700 
50701 /* Iterate through the target-specific builtin types for va_list.
50702    IDX denotes the iterator, *PTREE is set to the result type of
50703    the va_list builtin, and *PNAME to its internal type.
50704    Returns zero if there is no element for this index, otherwise
50705    IDX should be increased upon the next call.
50706    Note, do not iterate a base builtin's name like __builtin_va_list.
50707    Used from c_common_nodes_and_builtins.  */
50708 
50709 static int
50710 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50711 {
50712   if (TARGET_64BIT)
50713     {
50714       switch (idx)
50715 	{
50716 	default:
50717 	  break;
50718 
50719 	case 0:
50720 	  *ptree = ms_va_list_type_node;
50721 	  *pname = "__builtin_ms_va_list";
50722 	  return 1;
50723 
50724 	case 1:
50725 	  *ptree = sysv_va_list_type_node;
50726 	  *pname = "__builtin_sysv_va_list";
50727 	  return 1;
50728 	}
50729     }
50730 
50731   return 0;
50732 }
50733 
50734 #undef TARGET_SCHED_DISPATCH
50735 #define TARGET_SCHED_DISPATCH has_dispatch
50736 #undef TARGET_SCHED_DISPATCH_DO
50737 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50738 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50739 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50740 #undef TARGET_SCHED_REORDER
50741 #define TARGET_SCHED_REORDER ix86_sched_reorder
50742 #undef TARGET_SCHED_ADJUST_PRIORITY
50743 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50744 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50745 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50746   ix86_dependencies_evaluation_hook
50747 
50748 /* The size of the dispatch window is the total number of bytes of
50749    object code allowed in a window.  */
50750 #define DISPATCH_WINDOW_SIZE 16
50751 
50752 /* Number of dispatch windows considered for scheduling.  */
50753 #define MAX_DISPATCH_WINDOWS 3
50754 
50755 /* Maximum number of instructions in a window.  */
50756 #define MAX_INSN 4
50757 
50758 /* Maximum number of immediate operands in a window.  */
50759 #define MAX_IMM 4
50760 
50761 /* Maximum number of immediate bits allowed in a window.  */
50762 #define MAX_IMM_SIZE 128
50763 
50764 /* Maximum number of 32 bit immediates allowed in a window.  */
50765 #define MAX_IMM_32 4
50766 
50767 /* Maximum number of 64 bit immediates allowed in a window.  */
50768 #define MAX_IMM_64 2
50769 
50770 /* Maximum total of loads or prefetches allowed in a window.  */
50771 #define MAX_LOAD 2
50772 
50773 /* Maximum total of stores allowed in a window.  */
50774 #define MAX_STORE 1
50775 
50776 #undef BIG
50777 #define BIG 100
50778 
50779 
50780 /* Dispatch groups.  Istructions that affect the mix in a dispatch window.  */
50781 enum dispatch_group {
50782   disp_no_group = 0,
50783   disp_load,
50784   disp_store,
50785   disp_load_store,
50786   disp_prefetch,
50787   disp_imm,
50788   disp_imm_32,
50789   disp_imm_64,
50790   disp_branch,
50791   disp_cmp,
50792   disp_jcc,
50793   disp_last
50794 };
50795 
50796 /* Number of allowable groups in a dispatch window.  It is an array
50797    indexed by dispatch_group enum.  100 is used as a big number,
50798    because the number of these kind of operations does not have any
50799    effect in dispatch window, but we need them for other reasons in
50800    the table.  */
50801 static unsigned int num_allowable_groups[disp_last] = {
50802   0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50803 };
50804 
50805 char group_name[disp_last + 1][16] = {
50806   "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50807   "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50808   "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50809 };
50810 
50811 /* Instruction path.  */
50812 enum insn_path {
50813   no_path = 0,
50814   path_single, /* Single micro op.  */
50815   path_double, /* Double micro op.  */
50816   path_multi,  /* Instructions with more than 2 micro op..  */
50817   last_path
50818 };
50819 
50820 /* sched_insn_info defines a window to the instructions scheduled in
50821    the basic block.  It contains a pointer to the insn_info table and
50822    the instruction scheduled.
50823 
50824    Windows are allocated for each basic block and are linked
50825    together.  */
50826 typedef struct sched_insn_info_s {
50827   rtx insn;
50828   enum dispatch_group group;
50829   enum insn_path path;
50830   int byte_len;
50831   int imm_bytes;
50832 } sched_insn_info;
50833 
50834 /* Linked list of dispatch windows.  This is a two way list of
50835    dispatch windows of a basic block.  It contains information about
50836    the number of uops in the window and the total number of
50837    instructions and of bytes in the object code for this dispatch
50838    window.  */
50839 typedef struct dispatch_windows_s {
50840   int num_insn;            /* Number of insn in the window.  */
50841   int num_uops;            /* Number of uops in the window.  */
50842   int window_size;         /* Number of bytes in the window.  */
50843   int window_num;          /* Window number between 0 or 1.  */
50844   int num_imm;             /* Number of immediates in an insn.  */
50845   int num_imm_32;          /* Number of 32 bit immediates in an insn.  */
50846   int num_imm_64;          /* Number of 64 bit immediates in an insn.  */
50847   int imm_size;            /* Total immediates in the window.  */
50848   int num_loads;           /* Total memory loads in the window.  */
50849   int num_stores;          /* Total memory stores in the window.  */
50850   int violation;          /* Violation exists in window.  */
50851   sched_insn_info *window; /* Pointer to the window.  */
50852   struct dispatch_windows_s *next;
50853   struct dispatch_windows_s *prev;
50854 } dispatch_windows;
50855 
50856 /* Immediate valuse used in an insn.  */
50857 typedef struct imm_info_s
50858   {
50859     int imm;
50860     int imm32;
50861     int imm64;
50862   } imm_info;
50863 
50864 static dispatch_windows *dispatch_window_list;
50865 static dispatch_windows *dispatch_window_list1;
50866 
50867 /* Get dispatch group of insn.  */
50868 
50869 static enum dispatch_group
50870 get_mem_group (rtx_insn *insn)
50871 {
50872   enum attr_memory memory;
50873 
50874   if (INSN_CODE (insn) < 0)
50875     return disp_no_group;
50876   memory = get_attr_memory (insn);
50877   if (memory == MEMORY_STORE)
50878     return disp_store;
50879 
50880   if (memory == MEMORY_LOAD)
50881     return disp_load;
50882 
50883   if (memory == MEMORY_BOTH)
50884     return disp_load_store;
50885 
50886   return disp_no_group;
50887 }
50888 
50889 /* Return true if insn is a compare instruction.  */
50890 
50891 static bool
50892 is_cmp (rtx_insn *insn)
50893 {
50894   enum attr_type type;
50895 
50896   type = get_attr_type (insn);
50897   return (type == TYPE_TEST
50898 	  || type == TYPE_ICMP
50899 	  || type == TYPE_FCMP
50900 	  || GET_CODE (PATTERN (insn)) == COMPARE);
50901 }
50902 
50903 /* Return true if a dispatch violation encountered.  */
50904 
50905 static bool
50906 dispatch_violation (void)
50907 {
50908   if (dispatch_window_list->next)
50909     return dispatch_window_list->next->violation;
50910   return dispatch_window_list->violation;
50911 }
50912 
50913 /* Return true if insn is a branch instruction.  */
50914 
50915 static bool
50916 is_branch (rtx insn)
50917 {
50918   return (CALL_P (insn) || JUMP_P (insn));
50919 }
50920 
50921 /* Return true if insn is a prefetch instruction.  */
50922 
50923 static bool
50924 is_prefetch (rtx insn)
50925 {
50926   return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50927 }
50928 
50929 /* This function initializes a dispatch window and the list container holding a
50930    pointer to the window.  */
50931 
50932 static void
50933 init_window (int window_num)
50934 {
50935   int i;
50936   dispatch_windows *new_list;
50937 
50938   if (window_num == 0)
50939     new_list = dispatch_window_list;
50940   else
50941     new_list = dispatch_window_list1;
50942 
50943   new_list->num_insn = 0;
50944   new_list->num_uops = 0;
50945   new_list->window_size = 0;
50946   new_list->next = NULL;
50947   new_list->prev = NULL;
50948   new_list->window_num = window_num;
50949   new_list->num_imm = 0;
50950   new_list->num_imm_32 = 0;
50951   new_list->num_imm_64 = 0;
50952   new_list->imm_size = 0;
50953   new_list->num_loads = 0;
50954   new_list->num_stores = 0;
50955   new_list->violation = false;
50956 
50957   for (i = 0; i < MAX_INSN; i++)
50958     {
50959       new_list->window[i].insn = NULL;
50960       new_list->window[i].group = disp_no_group;
50961       new_list->window[i].path = no_path;
50962       new_list->window[i].byte_len = 0;
50963       new_list->window[i].imm_bytes = 0;
50964     }
50965   return;
50966 }
50967 
50968 /* This function allocates and initializes a dispatch window and the
50969    list container holding a pointer to the window.  */
50970 
50971 static dispatch_windows *
50972 allocate_window (void)
50973 {
50974   dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50975   new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50976 
50977   return new_list;
50978 }
50979 
50980 /* This routine initializes the dispatch scheduling information.  It
50981    initiates building dispatch scheduler tables and constructs the
50982    first dispatch window.  */
50983 
50984 static void
50985 init_dispatch_sched (void)
50986 {
50987   /* Allocate a dispatch list and a window.  */
50988   dispatch_window_list = allocate_window ();
50989   dispatch_window_list1 = allocate_window ();
50990   init_window (0);
50991   init_window (1);
50992 }
50993 
50994 /* This function returns true if a branch is detected.  End of a basic block
50995    does not have to be a branch, but here we assume only branches end a
50996    window.  */
50997 
50998 static bool
50999 is_end_basic_block (enum dispatch_group group)
51000 {
51001   return group == disp_branch;
51002 }
51003 
51004 /* This function is called when the end of a window processing is reached.  */
51005 
51006 static void
51007 process_end_window (void)
51008 {
51009   gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
51010   if (dispatch_window_list->next)
51011     {
51012       gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
51013       gcc_assert (dispatch_window_list->window_size
51014 		  + dispatch_window_list1->window_size <= 48);
51015       init_window (1);
51016     }
51017   init_window (0);
51018 }
51019 
51020 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
51021    WINDOW_NUM is either 0 or 1.  A maximum of two windows are generated
51022    for 48 bytes of instructions.  Note that these windows are not dispatch
51023    windows that their sizes are DISPATCH_WINDOW_SIZE.  */
51024 
51025 static dispatch_windows *
51026 allocate_next_window (int window_num)
51027 {
51028   if (window_num == 0)
51029     {
51030       if (dispatch_window_list->next)
51031 	  init_window (1);
51032       init_window (0);
51033       return dispatch_window_list;
51034     }
51035 
51036   dispatch_window_list->next = dispatch_window_list1;
51037   dispatch_window_list1->prev = dispatch_window_list;
51038 
51039   return dispatch_window_list1;
51040 }
51041 
51042 /* Compute number of immediate operands of an instruction.  */
51043 
51044 static void
51045 find_constant (rtx in_rtx, imm_info *imm_values)
51046 {
51047   if (INSN_P (in_rtx))
51048     in_rtx = PATTERN (in_rtx);
51049   subrtx_iterator::array_type array;
51050   FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
51051     if (const_rtx x = *iter)
51052       switch (GET_CODE (x))
51053 	{
51054 	case CONST:
51055 	case SYMBOL_REF:
51056 	case CONST_INT:
51057 	  (imm_values->imm)++;
51058 	  if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
51059 	    (imm_values->imm32)++;
51060 	  else
51061 	    (imm_values->imm64)++;
51062 	  break;
51063 
51064 	case CONST_DOUBLE:
51065 	  (imm_values->imm)++;
51066 	  (imm_values->imm64)++;
51067 	  break;
51068 
51069 	case CODE_LABEL:
51070 	  if (LABEL_KIND (x) == LABEL_NORMAL)
51071 	    {
51072 	      (imm_values->imm)++;
51073 	      (imm_values->imm32)++;
51074 	    }
51075 	  break;
51076 
51077 	default:
51078 	  break;
51079 	}
51080 }
51081 
51082 /* Return total size of immediate operands of an instruction along with number
51083    of corresponding immediate-operands.  It initializes its parameters to zero
51084    befor calling FIND_CONSTANT.
51085    INSN is the input instruction.  IMM is the total of immediates.
51086    IMM32 is the number of 32 bit immediates.  IMM64 is the number of 64
51087    bit immediates.  */
51088 
51089 static int
51090 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
51091 {
51092   imm_info imm_values = {0, 0, 0};
51093 
51094   find_constant (insn, &imm_values);
51095   *imm = imm_values.imm;
51096   *imm32 = imm_values.imm32;
51097   *imm64 = imm_values.imm64;
51098   return imm_values.imm32 * 4 + imm_values.imm64 * 8;
51099 }
51100 
51101 /* This function indicates if an operand of an instruction is an
51102    immediate.  */
51103 
51104 static bool
51105 has_immediate (rtx insn)
51106 {
51107   int num_imm_operand;
51108   int num_imm32_operand;
51109   int num_imm64_operand;
51110 
51111   if (insn)
51112     return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51113 			       &num_imm64_operand);
51114   return false;
51115 }
51116 
51117 /* Return single or double path for instructions.  */
51118 
51119 static enum insn_path
51120 get_insn_path (rtx_insn *insn)
51121 {
51122   enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
51123 
51124   if ((int)path == 0)
51125     return path_single;
51126 
51127   if ((int)path == 1)
51128     return path_double;
51129 
51130   return path_multi;
51131 }
51132 
51133 /* Return insn dispatch group.  */
51134 
51135 static enum dispatch_group
51136 get_insn_group (rtx_insn *insn)
51137 {
51138   enum dispatch_group group = get_mem_group (insn);
51139   if (group)
51140     return group;
51141 
51142   if (is_branch (insn))
51143     return disp_branch;
51144 
51145   if (is_cmp (insn))
51146     return disp_cmp;
51147 
51148   if (has_immediate (insn))
51149     return disp_imm;
51150 
51151   if (is_prefetch (insn))
51152     return disp_prefetch;
51153 
51154   return disp_no_group;
51155 }
51156 
51157 /* Count number of GROUP restricted instructions in a dispatch
51158    window WINDOW_LIST.  */
51159 
51160 static int
51161 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
51162 {
51163   enum dispatch_group group = get_insn_group (insn);
51164   int imm_size;
51165   int num_imm_operand;
51166   int num_imm32_operand;
51167   int num_imm64_operand;
51168 
51169   if (group == disp_no_group)
51170     return 0;
51171 
51172   if (group == disp_imm)
51173     {
51174       imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51175 			      &num_imm64_operand);
51176       if (window_list->imm_size + imm_size > MAX_IMM_SIZE
51177 	  || num_imm_operand + window_list->num_imm > MAX_IMM
51178 	  || (num_imm32_operand > 0
51179 	      && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
51180 		  || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
51181 	  || (num_imm64_operand > 0
51182 	      && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
51183 		  || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
51184 	  || (window_list->imm_size + imm_size == MAX_IMM_SIZE
51185 	      && num_imm64_operand > 0
51186 	      && ((window_list->num_imm_64 > 0
51187 		   && window_list->num_insn >= 2)
51188 		  || window_list->num_insn >= 3)))
51189 	return BIG;
51190 
51191       return 1;
51192     }
51193 
51194   if ((group == disp_load_store
51195        && (window_list->num_loads >= MAX_LOAD
51196 	   || window_list->num_stores >= MAX_STORE))
51197       || ((group == disp_load
51198 	   || group == disp_prefetch)
51199 	  && window_list->num_loads >= MAX_LOAD)
51200       || (group == disp_store
51201 	  && window_list->num_stores >= MAX_STORE))
51202     return BIG;
51203 
51204   return 1;
51205 }
51206 
51207 /* This function returns true if insn satisfies dispatch rules on the
51208    last window scheduled.  */
51209 
51210 static bool
51211 fits_dispatch_window (rtx_insn *insn)
51212 {
51213   dispatch_windows *window_list = dispatch_window_list;
51214   dispatch_windows *window_list_next = dispatch_window_list->next;
51215   unsigned int num_restrict;
51216   enum dispatch_group group = get_insn_group (insn);
51217   enum insn_path path = get_insn_path (insn);
51218   int sum;
51219 
51220   /* Make disp_cmp and disp_jcc get scheduled at the latest.  These
51221      instructions should be given the lowest priority in the
51222      scheduling process in Haifa scheduler to make sure they will be
51223      scheduled in the same dispatch window as the reference to them.  */
51224   if (group == disp_jcc || group == disp_cmp)
51225     return false;
51226 
51227   /* Check nonrestricted.  */
51228   if (group == disp_no_group || group == disp_branch)
51229     return true;
51230 
51231   /* Get last dispatch window.  */
51232   if (window_list_next)
51233     window_list = window_list_next;
51234 
51235   if (window_list->window_num == 1)
51236     {
51237       sum = window_list->prev->window_size + window_list->window_size;
51238 
51239       if (sum == 32
51240 	  || (min_insn_size (insn) + sum) >= 48)
51241 	/* Window 1 is full.  Go for next window.  */
51242 	return true;
51243     }
51244 
51245   num_restrict = count_num_restricted (insn, window_list);
51246 
51247   if (num_restrict > num_allowable_groups[group])
51248     return false;
51249 
51250   /* See if it fits in the first window.  */
51251   if (window_list->window_num == 0)
51252     {
51253       /* The first widow should have only single and double path
51254 	 uops.  */
51255       if (path == path_double
51256 	  && (window_list->num_uops + 2) > MAX_INSN)
51257 	return false;
51258       else if (path != path_single)
51259         return false;
51260     }
51261   return true;
51262 }
51263 
51264 /* Add an instruction INSN with NUM_UOPS micro-operations to the
51265    dispatch window WINDOW_LIST.  */
51266 
51267 static void
51268 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
51269 {
51270   int byte_len = min_insn_size (insn);
51271   int num_insn = window_list->num_insn;
51272   int imm_size;
51273   sched_insn_info *window = window_list->window;
51274   enum dispatch_group group = get_insn_group (insn);
51275   enum insn_path path = get_insn_path (insn);
51276   int num_imm_operand;
51277   int num_imm32_operand;
51278   int num_imm64_operand;
51279 
51280   if (!window_list->violation && group != disp_cmp
51281       && !fits_dispatch_window (insn))
51282     window_list->violation = true;
51283 
51284   imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51285 				 &num_imm64_operand);
51286 
51287   /* Initialize window with new instruction.  */
51288   window[num_insn].insn = insn;
51289   window[num_insn].byte_len = byte_len;
51290   window[num_insn].group = group;
51291   window[num_insn].path = path;
51292   window[num_insn].imm_bytes = imm_size;
51293 
51294   window_list->window_size += byte_len;
51295   window_list->num_insn = num_insn + 1;
51296   window_list->num_uops = window_list->num_uops + num_uops;
51297   window_list->imm_size += imm_size;
51298   window_list->num_imm += num_imm_operand;
51299   window_list->num_imm_32 += num_imm32_operand;
51300   window_list->num_imm_64 += num_imm64_operand;
51301 
51302   if (group == disp_store)
51303     window_list->num_stores += 1;
51304   else if (group == disp_load
51305 	   || group == disp_prefetch)
51306     window_list->num_loads += 1;
51307   else if (group == disp_load_store)
51308     {
51309       window_list->num_stores += 1;
51310       window_list->num_loads += 1;
51311     }
51312 }
51313 
51314 /* Adds a scheduled instruction, INSN, to the current dispatch window.
51315    If the total bytes of instructions or the number of instructions in
51316    the window exceed allowable, it allocates a new window.  */
51317 
51318 static void
51319 add_to_dispatch_window (rtx_insn *insn)
51320 {
51321   int byte_len;
51322   dispatch_windows *window_list;
51323   dispatch_windows *next_list;
51324   dispatch_windows *window0_list;
51325   enum insn_path path;
51326   enum dispatch_group insn_group;
51327   bool insn_fits;
51328   int num_insn;
51329   int num_uops;
51330   int window_num;
51331   int insn_num_uops;
51332   int sum;
51333 
51334   if (INSN_CODE (insn) < 0)
51335     return;
51336 
51337   byte_len = min_insn_size (insn);
51338   window_list = dispatch_window_list;
51339   next_list = window_list->next;
51340   path = get_insn_path (insn);
51341   insn_group = get_insn_group (insn);
51342 
51343   /* Get the last dispatch window.  */
51344   if (next_list)
51345       window_list = dispatch_window_list->next;
51346 
51347   if (path == path_single)
51348     insn_num_uops = 1;
51349   else if (path == path_double)
51350     insn_num_uops = 2;
51351   else
51352     insn_num_uops = (int) path;
51353 
51354   /* If current window is full, get a new window.
51355      Window number zero is full, if MAX_INSN uops are scheduled in it.
51356      Window number one is full, if window zero's bytes plus window
51357      one's bytes is 32, or if the bytes of the new instruction added
51358      to the total makes it greater than 48, or it has already MAX_INSN
51359      instructions in it.  */
51360   num_insn = window_list->num_insn;
51361   num_uops = window_list->num_uops;
51362   window_num = window_list->window_num;
51363   insn_fits = fits_dispatch_window (insn);
51364 
51365   if (num_insn >= MAX_INSN
51366       || num_uops + insn_num_uops > MAX_INSN
51367       || !(insn_fits))
51368     {
51369       window_num = ~window_num & 1;
51370       window_list = allocate_next_window (window_num);
51371     }
51372 
51373   if (window_num == 0)
51374     {
51375       add_insn_window (insn, window_list, insn_num_uops);
51376       if (window_list->num_insn >= MAX_INSN
51377 	  && insn_group == disp_branch)
51378 	{
51379 	  process_end_window ();
51380 	  return;
51381 	}
51382     }
51383   else if (window_num == 1)
51384     {
51385       window0_list = window_list->prev;
51386       sum = window0_list->window_size + window_list->window_size;
51387       if (sum == 32
51388 	  || (byte_len + sum) >= 48)
51389 	{
51390 	  process_end_window ();
51391 	  window_list = dispatch_window_list;
51392 	}
51393 
51394       add_insn_window (insn, window_list, insn_num_uops);
51395     }
51396   else
51397     gcc_unreachable ();
51398 
51399   if (is_end_basic_block (insn_group))
51400     {
51401       /* End of basic block is reached do end-basic-block process.  */
51402       process_end_window ();
51403       return;
51404     }
51405 }
51406 
51407 /* Print the dispatch window, WINDOW_NUM, to FILE.  */
51408 
51409 DEBUG_FUNCTION static void
51410 debug_dispatch_window_file (FILE *file, int window_num)
51411 {
51412   dispatch_windows *list;
51413   int i;
51414 
51415   if (window_num == 0)
51416     list = dispatch_window_list;
51417   else
51418     list = dispatch_window_list1;
51419 
51420   fprintf (file, "Window #%d:\n", list->window_num);
51421   fprintf (file, "  num_insn = %d, num_uops = %d, window_size = %d\n",
51422 	  list->num_insn, list->num_uops, list->window_size);
51423   fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51424 	   list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51425 
51426   fprintf (file, "  num_loads = %d, num_stores = %d\n", list->num_loads,
51427 	  list->num_stores);
51428   fprintf (file, " insn info:\n");
51429 
51430   for (i = 0; i < MAX_INSN; i++)
51431     {
51432       if (!list->window[i].insn)
51433 	break;
51434       fprintf (file, "    group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51435 	      i, group_name[list->window[i].group],
51436 	      i, (void *)list->window[i].insn,
51437 	      i, list->window[i].path,
51438 	      i, list->window[i].byte_len,
51439 	      i, list->window[i].imm_bytes);
51440     }
51441 }
51442 
51443 /* Print to stdout a dispatch window.  */
51444 
51445 DEBUG_FUNCTION void
51446 debug_dispatch_window (int window_num)
51447 {
51448   debug_dispatch_window_file (stdout, window_num);
51449 }
51450 
51451 /* Print INSN dispatch information to FILE.  */
51452 
51453 DEBUG_FUNCTION static void
51454 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51455 {
51456   int byte_len;
51457   enum insn_path path;
51458   enum dispatch_group group;
51459   int imm_size;
51460   int num_imm_operand;
51461   int num_imm32_operand;
51462   int num_imm64_operand;
51463 
51464   if (INSN_CODE (insn) < 0)
51465     return;
51466 
51467   byte_len = min_insn_size (insn);
51468   path = get_insn_path (insn);
51469   group = get_insn_group (insn);
51470   imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51471 				 &num_imm64_operand);
51472 
51473   fprintf (file, " insn info:\n");
51474   fprintf (file, "  group = %s, path = %d, byte_len = %d\n",
51475 	   group_name[group], path, byte_len);
51476   fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51477 	   num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51478 }
51479 
51480 /* Print to STDERR the status of the ready list with respect to
51481    dispatch windows.  */
51482 
51483 DEBUG_FUNCTION void
51484 debug_ready_dispatch (void)
51485 {
51486   int i;
51487   int no_ready = number_in_ready ();
51488 
51489   fprintf (stdout, "Number of ready: %d\n", no_ready);
51490 
51491   for (i = 0; i < no_ready; i++)
51492     debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51493 }
51494 
51495 /* This routine is the driver of the dispatch scheduler.  */
51496 
51497 static void
51498 do_dispatch (rtx_insn *insn, int mode)
51499 {
51500   if (mode == DISPATCH_INIT)
51501     init_dispatch_sched ();
51502   else if (mode == ADD_TO_DISPATCH_WINDOW)
51503     add_to_dispatch_window (insn);
51504 }
51505 
51506 /* Return TRUE if Dispatch Scheduling is supported.  */
51507 
51508 static bool
51509 has_dispatch (rtx_insn *insn, int action)
51510 {
51511   if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51512       && flag_dispatch_scheduler)
51513     switch (action)
51514       {
51515       default:
51516 	return false;
51517 
51518       case IS_DISPATCH_ON:
51519 	return true;
51520 	break;
51521 
51522       case IS_CMP:
51523 	return is_cmp (insn);
51524 
51525       case DISPATCH_VIOLATION:
51526 	return dispatch_violation ();
51527 
51528       case FITS_DISPATCH_WINDOW:
51529 	return fits_dispatch_window (insn);
51530       }
51531 
51532   return false;
51533 }
51534 
51535 /* Implementation of reassociation_width target hook used by
51536    reassoc phase to identify parallelism level in reassociated
51537    tree.  Statements tree_code is passed in OPC.  Arguments type
51538    is passed in MODE.
51539 
51540    Currently parallel reassociation is enabled for Atom
51541    processors only and we set reassociation width to be 2
51542    because Atom may issue up to 2 instructions per cycle.
51543 
51544    Return value should be fixed if parallel reassociation is
51545    enabled for other processors.  */
51546 
51547 static int
51548 ix86_reassociation_width (unsigned int, machine_mode mode)
51549 {
51550   /* Vector part.  */
51551   if (VECTOR_MODE_P (mode))
51552     {
51553       if (TARGET_VECTOR_PARALLEL_EXECUTION)
51554 	return 2;
51555       else
51556 	return 1;
51557     }
51558 
51559   /* Scalar part.  */
51560   if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51561     return 2;
51562   else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51563     return 2;
51564   else
51565     return 1;
51566 }
51567 
51568 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51569    place emms and femms instructions.  */
51570 
51571 static machine_mode
51572 ix86_preferred_simd_mode (machine_mode mode)
51573 {
51574   if (!TARGET_SSE)
51575     return word_mode;
51576 
51577   switch (mode)
51578     {
51579     case QImode:
51580       return TARGET_AVX512BW ? V64QImode :
51581        (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51582     case HImode:
51583       return TARGET_AVX512BW ? V32HImode :
51584        (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51585     case SImode:
51586       return TARGET_AVX512F ? V16SImode :
51587 	(TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51588     case DImode:
51589       return TARGET_AVX512F ? V8DImode :
51590 	(TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51591 
51592     case SFmode:
51593       if (TARGET_AVX512F)
51594 	return V16SFmode;
51595       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51596 	return V8SFmode;
51597       else
51598 	return V4SFmode;
51599 
51600     case DFmode:
51601       if (!TARGET_VECTORIZE_DOUBLE)
51602 	return word_mode;
51603       else if (TARGET_AVX512F)
51604 	return V8DFmode;
51605       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51606 	return V4DFmode;
51607       else if (TARGET_SSE2)
51608 	return V2DFmode;
51609       /* FALLTHRU */
51610 
51611     default:
51612       return word_mode;
51613     }
51614 }
51615 
51616 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51617    vectors.  If AVX512F is enabled then try vectorizing with 512bit,
51618    256bit and 128bit vectors.  */
51619 
51620 static unsigned int
51621 ix86_autovectorize_vector_sizes (void)
51622 {
51623   return TARGET_AVX512F ? 64 | 32 | 16 :
51624     (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51625 }
51626 
51627 
51628 
51629 /* Return class of registers which could be used for pseudo of MODE
51630    and of class RCLASS for spilling instead of memory.  Return NO_REGS
51631    if it is not possible or non-profitable.  */
51632 static reg_class_t
51633 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51634 {
51635   if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51636       && (mode == SImode || (TARGET_64BIT && mode == DImode))
51637       && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51638     return ALL_SSE_REGS;
51639   return NO_REGS;
51640 }
51641 
51642 /* Implement targetm.vectorize.init_cost.  */
51643 
51644 static void *
51645 ix86_init_cost (struct loop *)
51646 {
51647   unsigned *cost = XNEWVEC (unsigned, 3);
51648   cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51649   return cost;
51650 }
51651 
51652 /* Implement targetm.vectorize.add_stmt_cost.  */
51653 
51654 static unsigned
51655 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51656 		    struct _stmt_vec_info *stmt_info, int misalign,
51657 		    enum vect_cost_model_location where)
51658 {
51659   unsigned *cost = (unsigned *) data;
51660   unsigned retval = 0;
51661 
51662   tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51663   int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51664 
51665   /* Statements in an inner loop relative to the loop being
51666      vectorized are weighted more heavily.  The value here is
51667       arbitrary and could potentially be improved with analysis.  */
51668   if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51669     count *= 50;  /* FIXME.  */
51670 
51671   retval = (unsigned) (count * stmt_cost);
51672 
51673   /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51674      for Silvermont as it has out of order integer pipeline and can execute
51675      2 scalar instruction per tick, but has in order SIMD pipeline.  */
51676   if (TARGET_SILVERMONT || TARGET_INTEL)
51677     if (stmt_info && stmt_info->stmt)
51678       {
51679 	tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51680 	if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51681 	  retval = (retval * 17) / 10;
51682       }
51683 
51684   cost[where] += retval;
51685 
51686   return retval;
51687 }
51688 
51689 /* Implement targetm.vectorize.finish_cost.  */
51690 
51691 static void
51692 ix86_finish_cost (void *data, unsigned *prologue_cost,
51693 		  unsigned *body_cost, unsigned *epilogue_cost)
51694 {
51695   unsigned *cost = (unsigned *) data;
51696   *prologue_cost = cost[vect_prologue];
51697   *body_cost     = cost[vect_body];
51698   *epilogue_cost = cost[vect_epilogue];
51699 }
51700 
51701 /* Implement targetm.vectorize.destroy_cost_data.  */
51702 
51703 static void
51704 ix86_destroy_cost_data (void *data)
51705 {
51706   free (data);
51707 }
51708 
51709 /* Validate target specific memory model bits in VAL. */
51710 
51711 static unsigned HOST_WIDE_INT
51712 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51713 {
51714   enum memmodel model = memmodel_from_int (val);
51715   bool strong;
51716 
51717   if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51718 				      |MEMMODEL_MASK)
51719       || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51720     {
51721       warning (OPT_Winvalid_memory_model,
51722 	       "Unknown architecture specific memory model");
51723       return MEMMODEL_SEQ_CST;
51724     }
51725   strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
51726   if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
51727     {
51728       warning (OPT_Winvalid_memory_model,
51729               "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51730       return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51731     }
51732   if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
51733     {
51734       warning (OPT_Winvalid_memory_model,
51735               "HLE_RELEASE not used with RELEASE or stronger memory model");
51736       return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51737     }
51738   return val;
51739 }
51740 
51741 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51742    CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51743    CLONEI->simdlen.  Return 0 if SIMD clones shouldn't be emitted,
51744    or number of vecsize_mangle variants that should be emitted.  */
51745 
51746 static int
51747 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51748 					     struct cgraph_simd_clone *clonei,
51749 					     tree base_type, int num)
51750 {
51751   int ret = 1;
51752 
51753   if (clonei->simdlen
51754       && (clonei->simdlen < 2
51755 	  || clonei->simdlen > 16
51756 	  || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51757     {
51758       warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51759 		  "unsupported simdlen %d", clonei->simdlen);
51760       return 0;
51761     }
51762 
51763   tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51764   if (TREE_CODE (ret_type) != VOID_TYPE)
51765     switch (TYPE_MODE (ret_type))
51766       {
51767       case QImode:
51768       case HImode:
51769       case SImode:
51770       case DImode:
51771       case SFmode:
51772       case DFmode:
51773       /* case SCmode: */
51774       /* case DCmode: */
51775 	break;
51776       default:
51777 	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51778 		    "unsupported return type %qT for simd\n", ret_type);
51779 	return 0;
51780       }
51781 
51782   tree t;
51783   int i;
51784 
51785   for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51786     /* FIXME: Shouldn't we allow such arguments if they are uniform?  */
51787     switch (TYPE_MODE (TREE_TYPE (t)))
51788       {
51789       case QImode:
51790       case HImode:
51791       case SImode:
51792       case DImode:
51793       case SFmode:
51794       case DFmode:
51795       /* case SCmode: */
51796       /* case DCmode: */
51797 	break;
51798       default:
51799 	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51800 		    "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51801 	return 0;
51802       }
51803 
51804   if (clonei->cilk_elemental)
51805     {
51806       /* Parse here processor clause.  If not present, default to 'b'.  */
51807       clonei->vecsize_mangle = 'b';
51808     }
51809   else if (!TREE_PUBLIC (node->decl))
51810     {
51811       /* If the function isn't exported, we can pick up just one ISA
51812 	 for the clones.  */
51813       if (TARGET_AVX2)
51814 	clonei->vecsize_mangle = 'd';
51815       else if (TARGET_AVX)
51816 	clonei->vecsize_mangle = 'c';
51817       else
51818 	clonei->vecsize_mangle = 'b';
51819       ret = 1;
51820     }
51821   else
51822     {
51823       clonei->vecsize_mangle = "bcd"[num];
51824       ret = 3;
51825     }
51826   switch (clonei->vecsize_mangle)
51827     {
51828     case 'b':
51829       clonei->vecsize_int = 128;
51830       clonei->vecsize_float = 128;
51831       break;
51832     case 'c':
51833       clonei->vecsize_int = 128;
51834       clonei->vecsize_float = 256;
51835       break;
51836     case 'd':
51837       clonei->vecsize_int = 256;
51838       clonei->vecsize_float = 256;
51839       break;
51840     }
51841   if (clonei->simdlen == 0)
51842     {
51843       if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51844 	clonei->simdlen = clonei->vecsize_int;
51845       else
51846 	clonei->simdlen = clonei->vecsize_float;
51847       clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51848       if (clonei->simdlen > 16)
51849 	clonei->simdlen = 16;
51850     }
51851   return ret;
51852 }
51853 
51854 /* Add target attribute to SIMD clone NODE if needed.  */
51855 
51856 static void
51857 ix86_simd_clone_adjust (struct cgraph_node *node)
51858 {
51859   const char *str = NULL;
51860   gcc_assert (node->decl == cfun->decl);
51861   switch (node->simdclone->vecsize_mangle)
51862     {
51863     case 'b':
51864       if (!TARGET_SSE2)
51865 	str = "sse2";
51866       break;
51867     case 'c':
51868       if (!TARGET_AVX)
51869 	str = "avx";
51870       break;
51871     case 'd':
51872       if (!TARGET_AVX2)
51873 	str = "avx2";
51874       break;
51875     default:
51876       gcc_unreachable ();
51877     }
51878   if (str == NULL)
51879     return;
51880   push_cfun (NULL);
51881   tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51882   bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51883   gcc_assert (ok);
51884   pop_cfun ();
51885   ix86_reset_previous_fndecl ();
51886   ix86_set_current_function (node->decl);
51887 }
51888 
51889 /* If SIMD clone NODE can't be used in a vectorized loop
51890    in current function, return -1, otherwise return a badness of using it
51891    (0 if it is most desirable from vecsize_mangle point of view, 1
51892    slightly less desirable, etc.).  */
51893 
51894 static int
51895 ix86_simd_clone_usable (struct cgraph_node *node)
51896 {
51897   switch (node->simdclone->vecsize_mangle)
51898     {
51899     case 'b':
51900       if (!TARGET_SSE2)
51901 	return -1;
51902       if (!TARGET_AVX)
51903 	return 0;
51904       return TARGET_AVX2 ? 2 : 1;
51905     case 'c':
51906       if (!TARGET_AVX)
51907 	return -1;
51908       return TARGET_AVX2 ? 1 : 0;
51909       break;
51910     case 'd':
51911       if (!TARGET_AVX2)
51912 	return -1;
51913       return 0;
51914     default:
51915       gcc_unreachable ();
51916     }
51917 }
51918 
51919 /* This function adjusts the unroll factor based on
51920    the hardware capabilities. For ex, bdver3 has
51921    a loop buffer which makes unrolling of smaller
51922    loops less important. This function decides the
51923    unroll factor using number of memory references
51924    (value 32 is used) as a heuristic. */
51925 
51926 static unsigned
51927 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51928 {
51929   basic_block *bbs;
51930   rtx_insn *insn;
51931   unsigned i;
51932   unsigned mem_count = 0;
51933 
51934   if (!TARGET_ADJUST_UNROLL)
51935      return nunroll;
51936 
51937   /* Count the number of memory references within the loop body.
51938      This value determines the unrolling factor for bdver3 and bdver4
51939      architectures. */
51940   subrtx_iterator::array_type array;
51941   bbs = get_loop_body (loop);
51942   for (i = 0; i < loop->num_nodes; i++)
51943     FOR_BB_INSNS (bbs[i], insn)
51944       if (NONDEBUG_INSN_P (insn))
51945 	FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
51946 	  if (const_rtx x = *iter)
51947 	    if (MEM_P (x))
51948 	      {
51949 		machine_mode mode = GET_MODE (x);
51950 		unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51951 		if (n_words > 4)
51952 		  mem_count += 2;
51953 		else
51954 		  mem_count += 1;
51955 	      }
51956   free (bbs);
51957 
51958   if (mem_count && mem_count <=32)
51959     return 32/mem_count;
51960 
51961   return nunroll;
51962 }
51963 
51964 
51965 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P.  */
51966 
51967 static bool
51968 ix86_float_exceptions_rounding_supported_p (void)
51969 {
51970   /* For x87 floating point with standard excess precision handling,
51971      there is no adddf3 pattern (since x87 floating point only has
51972      XFmode operations) so the default hook implementation gets this
51973      wrong.  */
51974   return TARGET_80387 || TARGET_SSE_MATH;
51975 }
51976 
51977 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
51978 
51979 static void
51980 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51981 {
51982   if (!TARGET_80387 && !TARGET_SSE_MATH)
51983     return;
51984   tree exceptions_var = create_tmp_var (integer_type_node);
51985   if (TARGET_80387)
51986     {
51987       tree fenv_index_type = build_index_type (size_int (6));
51988       tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51989       tree fenv_var = create_tmp_var (fenv_type);
51990       mark_addressable (fenv_var);
51991       tree fenv_ptr = build_pointer_type (fenv_type);
51992       tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51993       fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51994       tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51995       tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51996       tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51997       tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51998       tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51999       tree hold_fnclex = build_call_expr (fnclex, 0);
52000       *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
52001 		      hold_fnclex);
52002       *clear = build_call_expr (fnclex, 0);
52003       tree sw_var = create_tmp_var (short_unsigned_type_node);
52004       tree fnstsw_call = build_call_expr (fnstsw, 0);
52005       tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
52006 			    sw_var, fnstsw_call);
52007       tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
52008       tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
52009 				exceptions_var, exceptions_x87);
52010       *update = build2 (COMPOUND_EXPR, integer_type_node,
52011 			sw_mod, update_mod);
52012       tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
52013       *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
52014     }
52015   if (TARGET_SSE_MATH)
52016     {
52017       tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
52018       tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
52019       tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
52020       tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
52021       tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
52022       tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
52023 				      mxcsr_orig_var, stmxcsr_hold_call);
52024       tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
52025 				  mxcsr_orig_var,
52026 				  build_int_cst (unsigned_type_node, 0x1f80));
52027       hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
52028 			     build_int_cst (unsigned_type_node, 0xffffffc0));
52029       tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
52030 				     mxcsr_mod_var, hold_mod_val);
52031       tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52032       tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
52033 			      hold_assign_orig, hold_assign_mod);
52034       hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
52035 			 ldmxcsr_hold_call);
52036       if (*hold)
52037 	*hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
52038       else
52039 	*hold = hold_all;
52040       tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52041       if (*clear)
52042 	*clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
52043 			 ldmxcsr_clear_call);
52044       else
52045 	*clear = ldmxcsr_clear_call;
52046       tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
52047       tree exceptions_sse = fold_convert (integer_type_node,
52048 					  stxmcsr_update_call);
52049       if (*update)
52050 	{
52051 	  tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
52052 					exceptions_var, exceptions_sse);
52053 	  tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
52054 					   exceptions_var, exceptions_mod);
52055 	  *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
52056 			    exceptions_assign);
52057 	}
52058       else
52059 	*update = build2 (MODIFY_EXPR, integer_type_node,
52060 			  exceptions_var, exceptions_sse);
52061       tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
52062       *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52063 			ldmxcsr_update_call);
52064     }
52065   tree atomic_feraiseexcept
52066     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
52067   tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
52068 						    1, exceptions_var);
52069   *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52070 		    atomic_feraiseexcept_call);
52071 }
52072 
52073 /* Return mode to be used for bounds or VOIDmode
52074    if bounds are not supported.  */
52075 
52076 static enum machine_mode
52077 ix86_mpx_bound_mode ()
52078 {
52079   /* Do not support pointer checker if MPX
52080      is not enabled.  */
52081   if (!TARGET_MPX)
52082     {
52083       if (flag_check_pointer_bounds)
52084 	warning (0, "Pointer Checker requires MPX support on this target."
52085 		 " Use -mmpx options to enable MPX.");
52086       return VOIDmode;
52087     }
52088 
52089   return BNDmode;
52090 }
52091 
52092 /*  Return constant used to statically initialize constant bounds.
52093 
52094     This function is used to create special bound values.  For now
52095     only INIT bounds and NONE bounds are expected.  More special
52096     values may be added later.  */
52097 
52098 static tree
52099 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
52100 {
52101   tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
52102     : build_zero_cst (pointer_sized_int_node);
52103   tree high = ub ? build_zero_cst (pointer_sized_int_node)
52104     : build_minus_one_cst (pointer_sized_int_node);
52105 
52106   /* This function is supposed to be used to create INIT and
52107      NONE bounds only.  */
52108   gcc_assert ((lb == 0 && ub == -1)
52109 	      || (lb == -1 && ub == 0));
52110 
52111   return build_complex (NULL, low, high);
52112 }
52113 
52114 /* Generate a list of statements STMTS to initialize pointer bounds
52115    variable VAR with bounds LB and UB.  Return the number of generated
52116    statements.  */
52117 
52118 static int
52119 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
52120 {
52121   tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
52122   tree lhs, modify, var_p;
52123 
52124   ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
52125   var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
52126 
52127   lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
52128   modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
52129   append_to_statement_list (modify, stmts);
52130 
52131   lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
52132 		build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
52133 			TYPE_SIZE_UNIT (pointer_sized_int_node)));
52134   modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
52135   append_to_statement_list (modify, stmts);
52136 
52137   return 2;
52138 }
52139 
52140 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
52141 /* For i386, common symbol is local only for non-PIE binaries.  For
52142    x86-64, common symbol is local only for non-PIE binaries or linker
52143    supports copy reloc in PIE binaries.   */
52144 
52145 static bool
52146 ix86_binds_local_p (const_tree exp)
52147 {
52148   return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
52149 				  (!flag_pic
52150 				   || (TARGET_64BIT
52151 				       && HAVE_LD_PIE_COPYRELOC != 0)));
52152 }
52153 #endif
52154 
52155 /* Initialize the GCC target structure.  */
52156 #undef TARGET_RETURN_IN_MEMORY
52157 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
52158 
52159 #undef TARGET_LEGITIMIZE_ADDRESS
52160 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
52161 
52162 #undef TARGET_ATTRIBUTE_TABLE
52163 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
52164 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
52165 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
52166 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52167 #  undef TARGET_MERGE_DECL_ATTRIBUTES
52168 #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
52169 #endif
52170 
52171 #undef TARGET_COMP_TYPE_ATTRIBUTES
52172 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
52173 
52174 #undef TARGET_INIT_BUILTINS
52175 #define TARGET_INIT_BUILTINS ix86_init_builtins
52176 #undef TARGET_BUILTIN_DECL
52177 #define TARGET_BUILTIN_DECL ix86_builtin_decl
52178 #undef TARGET_EXPAND_BUILTIN
52179 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
52180 
52181 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
52182 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
52183   ix86_builtin_vectorized_function
52184 
52185 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
52186 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
52187 
52188 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
52189 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
52190 
52191 #undef TARGET_VECTORIZE_BUILTIN_GATHER
52192 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
52193 
52194 #undef TARGET_BUILTIN_RECIPROCAL
52195 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
52196 
52197 #undef TARGET_ASM_FUNCTION_EPILOGUE
52198 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
52199 
52200 #undef TARGET_ENCODE_SECTION_INFO
52201 #ifndef SUBTARGET_ENCODE_SECTION_INFO
52202 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
52203 #else
52204 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
52205 #endif
52206 
52207 #undef TARGET_ASM_OPEN_PAREN
52208 #define TARGET_ASM_OPEN_PAREN ""
52209 #undef TARGET_ASM_CLOSE_PAREN
52210 #define TARGET_ASM_CLOSE_PAREN ""
52211 
52212 #undef TARGET_ASM_BYTE_OP
52213 #define TARGET_ASM_BYTE_OP ASM_BYTE
52214 
52215 #undef TARGET_ASM_ALIGNED_HI_OP
52216 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
52217 #undef TARGET_ASM_ALIGNED_SI_OP
52218 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
52219 #ifdef ASM_QUAD
52220 #undef TARGET_ASM_ALIGNED_DI_OP
52221 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
52222 #endif
52223 
52224 #undef TARGET_PROFILE_BEFORE_PROLOGUE
52225 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
52226 
52227 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
52228 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
52229 
52230 #undef TARGET_ASM_UNALIGNED_HI_OP
52231 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
52232 #undef TARGET_ASM_UNALIGNED_SI_OP
52233 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
52234 #undef TARGET_ASM_UNALIGNED_DI_OP
52235 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
52236 
52237 #undef TARGET_PRINT_OPERAND
52238 #define TARGET_PRINT_OPERAND ix86_print_operand
52239 #undef TARGET_PRINT_OPERAND_ADDRESS
52240 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
52241 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
52242 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
52243 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
52244 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
52245 
52246 #undef TARGET_SCHED_INIT_GLOBAL
52247 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
52248 #undef TARGET_SCHED_ADJUST_COST
52249 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
52250 #undef TARGET_SCHED_ISSUE_RATE
52251 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
52252 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
52253 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
52254   ia32_multipass_dfa_lookahead
52255 #undef TARGET_SCHED_MACRO_FUSION_P
52256 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
52257 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
52258 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
52259 
52260 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
52261 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
52262 
52263 #undef TARGET_MEMMODEL_CHECK
52264 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
52265 
52266 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
52267 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
52268 
52269 #ifdef HAVE_AS_TLS
52270 #undef TARGET_HAVE_TLS
52271 #define TARGET_HAVE_TLS true
52272 #endif
52273 #undef TARGET_CANNOT_FORCE_CONST_MEM
52274 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
52275 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
52276 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
52277 
52278 #undef TARGET_DELEGITIMIZE_ADDRESS
52279 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
52280 
52281 #undef TARGET_MS_BITFIELD_LAYOUT_P
52282 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
52283 
52284 #if TARGET_MACHO
52285 #undef TARGET_BINDS_LOCAL_P
52286 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
52287 #else
52288 #undef TARGET_BINDS_LOCAL_P
52289 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
52290 #endif
52291 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52292 #undef TARGET_BINDS_LOCAL_P
52293 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
52294 #endif
52295 
52296 #undef TARGET_ASM_OUTPUT_MI_THUNK
52297 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
52298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
52299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
52300 
52301 #undef TARGET_ASM_FILE_START
52302 #define TARGET_ASM_FILE_START x86_file_start
52303 
52304 #undef TARGET_OPTION_OVERRIDE
52305 #define TARGET_OPTION_OVERRIDE ix86_option_override
52306 
52307 #undef TARGET_REGISTER_MOVE_COST
52308 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
52309 #undef TARGET_MEMORY_MOVE_COST
52310 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
52311 #undef TARGET_RTX_COSTS
52312 #define TARGET_RTX_COSTS ix86_rtx_costs
52313 #undef TARGET_ADDRESS_COST
52314 #define TARGET_ADDRESS_COST ix86_address_cost
52315 
52316 #undef TARGET_FIXED_CONDITION_CODE_REGS
52317 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
52318 #undef TARGET_CC_MODES_COMPATIBLE
52319 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
52320 
52321 #undef TARGET_MACHINE_DEPENDENT_REORG
52322 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
52323 
52324 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
52325 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
52326 
52327 #undef TARGET_BUILD_BUILTIN_VA_LIST
52328 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
52329 
52330 #undef TARGET_FOLD_BUILTIN
52331 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
52332 
52333 #undef TARGET_COMPARE_VERSION_PRIORITY
52334 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
52335 
52336 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
52337 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
52338   ix86_generate_version_dispatcher_body
52339 
52340 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
52341 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
52342   ix86_get_function_versions_dispatcher
52343 
52344 #undef TARGET_ENUM_VA_LIST_P
52345 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
52346 
52347 #undef TARGET_FN_ABI_VA_LIST
52348 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
52349 
52350 #undef TARGET_CANONICAL_VA_LIST_TYPE
52351 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
52352 
52353 #undef TARGET_EXPAND_BUILTIN_VA_START
52354 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
52355 
52356 #undef TARGET_MD_ASM_CLOBBERS
52357 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
52358 
52359 #undef TARGET_PROMOTE_PROTOTYPES
52360 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
52361 #undef TARGET_SETUP_INCOMING_VARARGS
52362 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
52363 #undef TARGET_MUST_PASS_IN_STACK
52364 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
52365 #undef TARGET_FUNCTION_ARG_ADVANCE
52366 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52367 #undef TARGET_FUNCTION_ARG
52368 #define TARGET_FUNCTION_ARG ix86_function_arg
52369 #undef TARGET_INIT_PIC_REG
52370 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52371 #undef TARGET_USE_PSEUDO_PIC_REG
52372 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52373 #undef TARGET_FUNCTION_ARG_BOUNDARY
52374 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52375 #undef TARGET_PASS_BY_REFERENCE
52376 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52377 #undef TARGET_INTERNAL_ARG_POINTER
52378 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52379 #undef TARGET_UPDATE_STACK_BOUNDARY
52380 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52381 #undef TARGET_GET_DRAP_RTX
52382 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52383 #undef TARGET_STRICT_ARGUMENT_NAMING
52384 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52385 #undef TARGET_STATIC_CHAIN
52386 #define TARGET_STATIC_CHAIN ix86_static_chain
52387 #undef TARGET_TRAMPOLINE_INIT
52388 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52389 #undef TARGET_RETURN_POPS_ARGS
52390 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52391 
52392 #undef TARGET_LEGITIMATE_COMBINED_INSN
52393 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52394 
52395 #undef TARGET_ASAN_SHADOW_OFFSET
52396 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52397 
52398 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52399 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52400 
52401 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52402 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52403 
52404 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52405 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52406 
52407 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52408 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52409   ix86_libgcc_floating_mode_supported_p
52410 
52411 #undef TARGET_C_MODE_FOR_SUFFIX
52412 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52413 
52414 #ifdef HAVE_AS_TLS
52415 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52416 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52417 #endif
52418 
52419 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52420 #undef TARGET_INSERT_ATTRIBUTES
52421 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52422 #endif
52423 
52424 #undef TARGET_MANGLE_TYPE
52425 #define TARGET_MANGLE_TYPE ix86_mangle_type
52426 
52427 #if !TARGET_MACHO
52428 #undef TARGET_STACK_PROTECT_FAIL
52429 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52430 #endif
52431 
52432 #undef TARGET_FUNCTION_VALUE
52433 #define TARGET_FUNCTION_VALUE ix86_function_value
52434 
52435 #undef TARGET_FUNCTION_VALUE_REGNO_P
52436 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52437 
52438 #undef TARGET_PROMOTE_FUNCTION_MODE
52439 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52440 
52441 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52442 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52443 
52444 #undef TARGET_INSTANTIATE_DECLS
52445 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52446 
52447 #undef TARGET_SECONDARY_RELOAD
52448 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52449 
52450 #undef TARGET_CLASS_MAX_NREGS
52451 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52452 
52453 #undef TARGET_PREFERRED_RELOAD_CLASS
52454 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52455 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52456 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52457 #undef TARGET_CLASS_LIKELY_SPILLED_P
52458 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52459 
52460 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52461 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52462   ix86_builtin_vectorization_cost
52463 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52464 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52465   ix86_vectorize_vec_perm_const_ok
52466 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52467 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52468   ix86_preferred_simd_mode
52469 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52470 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52471   ix86_autovectorize_vector_sizes
52472 #undef TARGET_VECTORIZE_INIT_COST
52473 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52474 #undef TARGET_VECTORIZE_ADD_STMT_COST
52475 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52476 #undef TARGET_VECTORIZE_FINISH_COST
52477 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52478 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52479 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52480 
52481 #undef TARGET_SET_CURRENT_FUNCTION
52482 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52483 
52484 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52485 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52486 
52487 #undef TARGET_OPTION_SAVE
52488 #define TARGET_OPTION_SAVE ix86_function_specific_save
52489 
52490 #undef TARGET_OPTION_RESTORE
52491 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52492 
52493 #undef TARGET_OPTION_POST_STREAM_IN
52494 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52495 
52496 #undef TARGET_OPTION_PRINT
52497 #define TARGET_OPTION_PRINT ix86_function_specific_print
52498 
52499 #undef TARGET_OPTION_FUNCTION_VERSIONS
52500 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52501 
52502 #undef TARGET_CAN_INLINE_P
52503 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52504 
52505 #undef TARGET_LEGITIMATE_ADDRESS_P
52506 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52507 
52508 #undef TARGET_LRA_P
52509 #define TARGET_LRA_P hook_bool_void_true
52510 
52511 #undef TARGET_REGISTER_PRIORITY
52512 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52513 
52514 #undef TARGET_REGISTER_USAGE_LEVELING_P
52515 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52516 
52517 #undef TARGET_LEGITIMATE_CONSTANT_P
52518 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52519 
52520 #undef TARGET_FRAME_POINTER_REQUIRED
52521 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52522 
52523 #undef TARGET_CAN_ELIMINATE
52524 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52525 
52526 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52527 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52528 
52529 #undef TARGET_ASM_CODE_END
52530 #define TARGET_ASM_CODE_END ix86_code_end
52531 
52532 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52533 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52534 
52535 #if TARGET_MACHO
52536 #undef TARGET_INIT_LIBFUNCS
52537 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52538 #endif
52539 
52540 #undef TARGET_LOOP_UNROLL_ADJUST
52541 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52542 
52543 #undef TARGET_SPILL_CLASS
52544 #define TARGET_SPILL_CLASS ix86_spill_class
52545 
52546 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52547 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52548   ix86_simd_clone_compute_vecsize_and_simdlen
52549 
52550 #undef TARGET_SIMD_CLONE_ADJUST
52551 #define TARGET_SIMD_CLONE_ADJUST \
52552   ix86_simd_clone_adjust
52553 
52554 #undef TARGET_SIMD_CLONE_USABLE
52555 #define TARGET_SIMD_CLONE_USABLE \
52556   ix86_simd_clone_usable
52557 
52558 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52559 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52560   ix86_float_exceptions_rounding_supported_p
52561 
52562 #undef TARGET_MODE_EMIT
52563 #define TARGET_MODE_EMIT ix86_emit_mode_set
52564 
52565 #undef TARGET_MODE_NEEDED
52566 #define TARGET_MODE_NEEDED ix86_mode_needed
52567 
52568 #undef TARGET_MODE_AFTER
52569 #define TARGET_MODE_AFTER ix86_mode_after
52570 
52571 #undef TARGET_MODE_ENTRY
52572 #define TARGET_MODE_ENTRY ix86_mode_entry
52573 
52574 #undef TARGET_MODE_EXIT
52575 #define TARGET_MODE_EXIT ix86_mode_exit
52576 
52577 #undef TARGET_MODE_PRIORITY
52578 #define TARGET_MODE_PRIORITY ix86_mode_priority
52579 
52580 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52581 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52582 
52583 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52584 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52585 
52586 #undef TARGET_STORE_BOUNDS_FOR_ARG
52587 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52588 
52589 #undef TARGET_LOAD_RETURNED_BOUNDS
52590 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52591 
52592 #undef TARGET_STORE_RETURNED_BOUNDS
52593 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52594 
52595 #undef TARGET_CHKP_BOUND_MODE
52596 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52597 
52598 #undef TARGET_BUILTIN_CHKP_FUNCTION
52599 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52600 
52601 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52602 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52603 
52604 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52605 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52606 
52607 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52608 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52609 
52610 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52611 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52612 
52613 #undef TARGET_OFFLOAD_OPTIONS
52614 #define TARGET_OFFLOAD_OPTIONS \
52615   ix86_offload_options
52616 
52617 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52618 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52619 
52620 struct gcc_target targetm = TARGET_INITIALIZER;
52621 
52622 #include "gt-i386.h"
52623