xref: /llvm-project/llvm/test/CodeGen/X86/fma.ll (revision 5c37316b54ae763b3dacb6f1e8e1e94348ab4512)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=+avx,+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMA32
3; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=+avx,-fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMACALL32
4; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMA64
5; RUN: llc < %s -mtriple=x86_64-apple-darwin10  -mattr=-fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMACALL64
6; RUN: llc < %s -mtriple=x86_64-apple-darwin10  -mattr=+avx512f,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=AVX512
7; RUN: llc < %s -mtriple=x86_64-apple-darwin10  -mattr=+avx512vl,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=AVX512VL
8; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=bdver2 -mattr=-fma4 -show-mc-encoding | FileCheck %s --check-prefix=FMA32
9; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=bdver2 -mattr=-fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefixes=FMACALL32,FMACALL32_BDVER2
10
11define float @test_f32(float %a, float %b, float %c) #0 {
12; FMA32-LABEL: test_f32:
13; FMA32:       ## %bb.0:
14; FMA32-NEXT:    pushl %eax ## encoding: [0x50]
15; FMA32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
16; FMA32-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08]
17; FMA32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
18; FMA32-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c]
19; FMA32-NEXT:    vfmadd213ss {{[0-9]+}}(%esp), %xmm0, %xmm1 ## encoding: [0xc4,0xe2,0x79,0xa9,0x4c,0x24,0x10]
20; FMA32-NEXT:    ## xmm1 = (xmm0 * xmm1) + mem
21; FMA32-NEXT:    vmovss %xmm1, (%esp) ## encoding: [0xc5,0xfa,0x11,0x0c,0x24]
22; FMA32-NEXT:    flds (%esp) ## encoding: [0xd9,0x04,0x24]
23; FMA32-NEXT:    popl %eax ## encoding: [0x58]
24; FMA32-NEXT:    retl ## encoding: [0xc3]
25;
26; FMACALL32-LABEL: test_f32:
27; FMACALL32:       ## %bb.0:
28; FMACALL32-NEXT:    jmp _fmaf ## TAILCALL
29; FMACALL32-NEXT:    ## encoding: [0xeb,A]
30; FMACALL32-NEXT:    ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1
31;
32; FMA64-LABEL: test_f32:
33; FMA64:       ## %bb.0:
34; FMA64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
35; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
36; FMA64-NEXT:    retq ## encoding: [0xc3]
37;
38; FMACALL64-LABEL: test_f32:
39; FMACALL64:       ## %bb.0:
40; FMACALL64-NEXT:    jmp _fmaf ## TAILCALL
41; FMACALL64-NEXT:    ## encoding: [0xeb,A]
42; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1
43;
44; AVX512-LABEL: test_f32:
45; AVX512:       ## %bb.0:
46; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
47; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
48; AVX512-NEXT:    retq ## encoding: [0xc3]
49;
50; AVX512VL-LABEL: test_f32:
51; AVX512VL:       ## %bb.0:
52; AVX512VL-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
53; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
54; AVX512VL-NEXT:    retq ## encoding: [0xc3]
55  %call = call float @llvm.fma.f32(float %a, float %b, float %c)
56  ret float %call
57}
58
59define float @test_f32_reassoc(float %a, float %b, float %c) #0 {
60; FMA32-LABEL: test_f32_reassoc:
61; FMA32:       ## %bb.0:
62; FMA32-NEXT:    pushl %eax ## encoding: [0x50]
63; FMA32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
64; FMA32-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08]
65; FMA32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
66; FMA32-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c]
67; FMA32-NEXT:    vfmadd213ss {{[0-9]+}}(%esp), %xmm0, %xmm1 ## encoding: [0xc4,0xe2,0x79,0xa9,0x4c,0x24,0x10]
68; FMA32-NEXT:    ## xmm1 = (xmm0 * xmm1) + mem
69; FMA32-NEXT:    vmovss %xmm1, (%esp) ## encoding: [0xc5,0xfa,0x11,0x0c,0x24]
70; FMA32-NEXT:    flds (%esp) ## encoding: [0xd9,0x04,0x24]
71; FMA32-NEXT:    popl %eax ## encoding: [0x58]
72; FMA32-NEXT:    retl ## encoding: [0xc3]
73;
74; FMACALL32-LABEL: test_f32_reassoc:
75; FMACALL32:       ## %bb.0:
76; FMACALL32-NEXT:    pushl %eax ## encoding: [0x50]
77; FMACALL32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
78; FMACALL32-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08]
79; FMACALL32-NEXT:    vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0x44,0x24,0x0c]
80; FMACALL32-NEXT:    vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0x44,0x24,0x10]
81; FMACALL32-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
82; FMACALL32-NEXT:    flds (%esp) ## encoding: [0xd9,0x04,0x24]
83; FMACALL32-NEXT:    popl %eax ## encoding: [0x58]
84; FMACALL32-NEXT:    retl ## encoding: [0xc3]
85;
86; FMA64-LABEL: test_f32_reassoc:
87; FMA64:       ## %bb.0:
88; FMA64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
89; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
90; FMA64-NEXT:    retq ## encoding: [0xc3]
91;
92; FMACALL64-LABEL: test_f32_reassoc:
93; FMACALL64:       ## %bb.0:
94; FMACALL64-NEXT:    mulss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x59,0xc1]
95; FMACALL64-NEXT:    addss %xmm2, %xmm0 ## encoding: [0xf3,0x0f,0x58,0xc2]
96; FMACALL64-NEXT:    retq ## encoding: [0xc3]
97;
98; AVX512-LABEL: test_f32_reassoc:
99; AVX512:       ## %bb.0:
100; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
101; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
102; AVX512-NEXT:    retq ## encoding: [0xc3]
103;
104; AVX512VL-LABEL: test_f32_reassoc:
105; AVX512VL:       ## %bb.0:
106; AVX512VL-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
107; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
108; AVX512VL-NEXT:    retq ## encoding: [0xc3]
109  %call = call reassoc float @llvm.fma.f32(float %a, float %b, float %c)
110  ret float %call
111}
112
113define double @test_f64(double %a, double %b, double %c) #0 {
114; FMA32-LABEL: test_f64:
115; FMA32:       ## %bb.0: ## %entry
116; FMA32-NEXT:    subl $12, %esp ## encoding: [0x83,0xec,0x0c]
117; FMA32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
118; FMA32-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x10]
119; FMA32-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
120; FMA32-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x18]
121; FMA32-NEXT:    vfmadd213sd {{[0-9]+}}(%esp), %xmm0, %xmm1 ## encoding: [0xc4,0xe2,0xf9,0xa9,0x4c,0x24,0x20]
122; FMA32-NEXT:    ## xmm1 = (xmm0 * xmm1) + mem
123; FMA32-NEXT:    vmovsd %xmm1, (%esp) ## encoding: [0xc5,0xfb,0x11,0x0c,0x24]
124; FMA32-NEXT:    fldl (%esp) ## encoding: [0xdd,0x04,0x24]
125; FMA32-NEXT:    addl $12, %esp ## encoding: [0x83,0xc4,0x0c]
126; FMA32-NEXT:    retl ## encoding: [0xc3]
127;
128; FMACALL32-LABEL: test_f64:
129; FMACALL32:       ## %bb.0: ## %entry
130; FMACALL32-NEXT:    jmp _fma ## TAILCALL
131; FMACALL32-NEXT:    ## encoding: [0xeb,A]
132; FMACALL32-NEXT:    ## fixup A - offset: 1, value: _fma-1, kind: FK_PCRel_1
133;
134; FMA64-LABEL: test_f64:
135; FMA64:       ## %bb.0: ## %entry
136; FMA64-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
137; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
138; FMA64-NEXT:    retq ## encoding: [0xc3]
139;
140; FMACALL64-LABEL: test_f64:
141; FMACALL64:       ## %bb.0: ## %entry
142; FMACALL64-NEXT:    jmp _fma ## TAILCALL
143; FMACALL64-NEXT:    ## encoding: [0xeb,A]
144; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-1, kind: FK_PCRel_1
145;
146; AVX512-LABEL: test_f64:
147; AVX512:       ## %bb.0: ## %entry
148; AVX512-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
149; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
150; AVX512-NEXT:    retq ## encoding: [0xc3]
151;
152; AVX512VL-LABEL: test_f64:
153; AVX512VL:       ## %bb.0: ## %entry
154; AVX512VL-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
155; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
156; AVX512VL-NEXT:    retq ## encoding: [0xc3]
157entry:
158  %call = call double @llvm.fma.f64(double %a, double %b, double %c)
159  ret double %call
160}
161
162define x86_fp80 @test_f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) #0 {
163; FMA32-LABEL: test_f80:
164; FMA32:       ## %bb.0: ## %entry
165; FMA32-NEXT:    subl $60, %esp ## encoding: [0x83,0xec,0x3c]
166; FMA32-NEXT:    fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x40]
167; FMA32-NEXT:    fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x50]
168; FMA32-NEXT:    fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x60]
169; FMA32-NEXT:    fstpt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x7c,0x24,0x20]
170; FMA32-NEXT:    fstpt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x7c,0x24,0x10]
171; FMA32-NEXT:    fstpt (%esp) ## encoding: [0xdb,0x3c,0x24]
172; FMA32-NEXT:    calll _fmal ## encoding: [0xe8,A,A,A,A]
173; FMA32-NEXT:    ## fixup A - offset: 1, value: _fmal-4, kind: FK_PCRel_4
174; FMA32-NEXT:    addl $60, %esp ## encoding: [0x83,0xc4,0x3c]
175; FMA32-NEXT:    retl ## encoding: [0xc3]
176;
177; FMACALL32-LABEL: test_f80:
178; FMACALL32:       ## %bb.0: ## %entry
179; FMACALL32-NEXT:    subl $60, %esp ## encoding: [0x83,0xec,0x3c]
180; FMACALL32-NEXT:    fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x40]
181; FMACALL32-NEXT:    fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x50]
182; FMACALL32-NEXT:    fldt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x6c,0x24,0x60]
183; FMACALL32-NEXT:    fstpt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x7c,0x24,0x20]
184; FMACALL32-NEXT:    fstpt {{[0-9]+}}(%esp) ## encoding: [0xdb,0x7c,0x24,0x10]
185; FMACALL32-NEXT:    fstpt (%esp) ## encoding: [0xdb,0x3c,0x24]
186; FMACALL32-NEXT:    calll _fmal ## encoding: [0xe8,A,A,A,A]
187; FMACALL32-NEXT:    ## fixup A - offset: 1, value: _fmal-4, kind: FK_PCRel_4
188; FMACALL32-NEXT:    addl $60, %esp ## encoding: [0x83,0xc4,0x3c]
189; FMACALL32-NEXT:    retl ## encoding: [0xc3]
190;
191; FMA64-LABEL: test_f80:
192; FMA64:       ## %bb.0: ## %entry
193; FMA64-NEXT:    subq $56, %rsp ## encoding: [0x48,0x83,0xec,0x38]
194; FMA64-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x40]
195; FMA64-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x50]
196; FMA64-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x60]
197; FMA64-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x20]
198; FMA64-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x10]
199; FMA64-NEXT:    fstpt (%rsp) ## encoding: [0xdb,0x3c,0x24]
200; FMA64-NEXT:    callq _fmal ## encoding: [0xe8,A,A,A,A]
201; FMA64-NEXT:    ## fixup A - offset: 1, value: _fmal-4, kind: reloc_branch_4byte_pcrel
202; FMA64-NEXT:    addq $56, %rsp ## encoding: [0x48,0x83,0xc4,0x38]
203; FMA64-NEXT:    retq ## encoding: [0xc3]
204;
205; FMACALL64-LABEL: test_f80:
206; FMACALL64:       ## %bb.0: ## %entry
207; FMACALL64-NEXT:    subq $56, %rsp ## encoding: [0x48,0x83,0xec,0x38]
208; FMACALL64-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x40]
209; FMACALL64-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x50]
210; FMACALL64-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x60]
211; FMACALL64-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x20]
212; FMACALL64-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x10]
213; FMACALL64-NEXT:    fstpt (%rsp) ## encoding: [0xdb,0x3c,0x24]
214; FMACALL64-NEXT:    callq _fmal ## encoding: [0xe8,A,A,A,A]
215; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmal-4, kind: reloc_branch_4byte_pcrel
216; FMACALL64-NEXT:    addq $56, %rsp ## encoding: [0x48,0x83,0xc4,0x38]
217; FMACALL64-NEXT:    retq ## encoding: [0xc3]
218;
219; AVX512-LABEL: test_f80:
220; AVX512:       ## %bb.0: ## %entry
221; AVX512-NEXT:    subq $56, %rsp ## encoding: [0x48,0x83,0xec,0x38]
222; AVX512-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x40]
223; AVX512-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x50]
224; AVX512-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x60]
225; AVX512-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x20]
226; AVX512-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x10]
227; AVX512-NEXT:    fstpt (%rsp) ## encoding: [0xdb,0x3c,0x24]
228; AVX512-NEXT:    callq _fmal ## encoding: [0xe8,A,A,A,A]
229; AVX512-NEXT:    ## fixup A - offset: 1, value: _fmal-4, kind: reloc_branch_4byte_pcrel
230; AVX512-NEXT:    addq $56, %rsp ## encoding: [0x48,0x83,0xc4,0x38]
231; AVX512-NEXT:    retq ## encoding: [0xc3]
232;
233; AVX512VL-LABEL: test_f80:
234; AVX512VL:       ## %bb.0: ## %entry
235; AVX512VL-NEXT:    subq $56, %rsp ## encoding: [0x48,0x83,0xec,0x38]
236; AVX512VL-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x40]
237; AVX512VL-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x50]
238; AVX512VL-NEXT:    fldt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x6c,0x24,0x60]
239; AVX512VL-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x20]
240; AVX512VL-NEXT:    fstpt {{[0-9]+}}(%rsp) ## encoding: [0xdb,0x7c,0x24,0x10]
241; AVX512VL-NEXT:    fstpt (%rsp) ## encoding: [0xdb,0x3c,0x24]
242; AVX512VL-NEXT:    callq _fmal ## encoding: [0xe8,A,A,A,A]
243; AVX512VL-NEXT:    ## fixup A - offset: 1, value: _fmal-4, kind: reloc_branch_4byte_pcrel
244; AVX512VL-NEXT:    addq $56, %rsp ## encoding: [0x48,0x83,0xc4,0x38]
245; AVX512VL-NEXT:    retq ## encoding: [0xc3]
246entry:
247  %call = call x86_fp80 @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
248  ret x86_fp80 %call
249}
250
251define float @test_f32_cst() #0 {
252; FMA32-LABEL: test_f32_cst:
253; FMA32:       ## %bb.0: ## %entry
254; FMA32-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}} ## encoding: [0xd9,0x05,A,A,A,A]
255; FMA32-NEXT:    ## fixup A - offset: 2, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
256; FMA32-NEXT:    retl ## encoding: [0xc3]
257;
258; FMACALL32-LABEL: test_f32_cst:
259; FMACALL32:       ## %bb.0: ## %entry
260; FMACALL32-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}} ## encoding: [0xd9,0x05,A,A,A,A]
261; FMACALL32-NEXT:    ## fixup A - offset: 2, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
262; FMACALL32-NEXT:    retl ## encoding: [0xc3]
263;
264; FMA64-LABEL: test_f32_cst:
265; FMA64:       ## %bb.0: ## %entry
266; FMA64-NEXT:    vmovss {{.*#+}} xmm0 = [1.2E+1,0.0E+0,0.0E+0,0.0E+0]
267; FMA64-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
268; FMA64-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
269; FMA64-NEXT:    retq ## encoding: [0xc3]
270;
271; FMACALL64-LABEL: test_f32_cst:
272; FMACALL64:       ## %bb.0: ## %entry
273; FMACALL64-NEXT:    movss {{.*#+}} xmm0 = [1.2E+1,0.0E+0,0.0E+0,0.0E+0]
274; FMACALL64-NEXT:    ## encoding: [0xf3,0x0f,0x10,0x05,A,A,A,A]
275; FMACALL64-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
276; FMACALL64-NEXT:    retq ## encoding: [0xc3]
277;
278; AVX512-LABEL: test_f32_cst:
279; AVX512:       ## %bb.0: ## %entry
280; AVX512-NEXT:    vmovss {{.*#+}} xmm0 = [1.2E+1,0.0E+0,0.0E+0,0.0E+0]
281; AVX512-NEXT:    ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
282; AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
283; AVX512-NEXT:    retq ## encoding: [0xc3]
284;
285; AVX512VL-LABEL: test_f32_cst:
286; AVX512VL:       ## %bb.0: ## %entry
287; AVX512VL-NEXT:    vmovss {{.*#+}} xmm0 = [1.2E+1,0.0E+0,0.0E+0,0.0E+0]
288; AVX512VL-NEXT:    ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
289; AVX512VL-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
290; AVX512VL-NEXT:    retq ## encoding: [0xc3]
291entry:
292  %call = call float @llvm.fma.f32(float 3.0, float 3.0, float 3.0)
293  ret float %call
294}
295
296define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
297; FMA32-LABEL: test_v4f32:
298; FMA32:       ## %bb.0: ## %entry
299; FMA32-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
300; FMA32-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
301; FMA32-NEXT:    retl ## encoding: [0xc3]
302;
303; FMA64-LABEL: test_v4f32:
304; FMA64:       ## %bb.0: ## %entry
305; FMA64-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
306; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
307; FMA64-NEXT:    retq ## encoding: [0xc3]
308;
309; FMACALL64-LABEL: test_v4f32:
310; FMACALL64:       ## %bb.0: ## %entry
311; FMACALL64-NEXT:    subq $88, %rsp ## encoding: [0x48,0x83,0xec,0x58]
312; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
313; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x30]
314; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
315; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x10]
316; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
317; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x20]
318; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
319; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
320; FMACALL64-NEXT:    shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff]
321; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3,3,3]
322; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff]
323; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3,3,3]
324; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
325; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
326; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
327; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
328; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
329; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x20]
330; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
331; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
332; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
333; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
334; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
335; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
336; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
337; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x30]
338; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
339; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
340; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
341; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
342; FMACALL64-NEXT:    unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload
343; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x04,0x24]
344; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
345; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
346; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
347; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
348; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x20]
349; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
350; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
351; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
352; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x30]
353; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
354; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
355; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
356; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x40]
357; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
358; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x20]
359; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
360; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
361; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
362; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
363; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
364; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
365; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
366; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x30]
367; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
368; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
369; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
370; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
371; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
372; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x40]
373; FMACALL64-NEXT:    unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8]
374; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
375; FMACALL64-NEXT:    unpcklpd (%rsp), %xmm1 ## 16-byte Folded Reload
376; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x0c,0x24]
377; FMACALL64-NEXT:    ## xmm1 = xmm1[0],mem[0]
378; FMACALL64-NEXT:    movaps %xmm1, %xmm0 ## encoding: [0x0f,0x28,0xc1]
379; FMACALL64-NEXT:    addq $88, %rsp ## encoding: [0x48,0x83,0xc4,0x58]
380; FMACALL64-NEXT:    retq ## encoding: [0xc3]
381;
382; AVX512-LABEL: test_v4f32:
383; AVX512:       ## %bb.0: ## %entry
384; AVX512-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
385; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
386; AVX512-NEXT:    retq ## encoding: [0xc3]
387;
388; AVX512VL-LABEL: test_v4f32:
389; AVX512VL:       ## %bb.0: ## %entry
390; AVX512VL-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
391; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
392; AVX512VL-NEXT:    retq ## encoding: [0xc3]
393;
394; FMACALL32_BDVER2-LABEL: test_v4f32:
395; FMACALL32_BDVER2:       ## %bb.0: ## %entry
396; FMACALL32_BDVER2-NEXT:    subl $108, %esp ## encoding: [0x83,0xec,0x6c]
397; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
398; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x40]
399; FMACALL32_BDVER2-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
400; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x30]
401; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
402; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x20]
403; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x08,0x02]
404; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm1, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x4c,0x24,0x04,0x02]
405; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02]
406; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
407; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
408; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
409; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x60]
410; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
411; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40]
412; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
413; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x30]
414; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
415; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x20]
416; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
417; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01]
418; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01]
419; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
420; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
421; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
422; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x54]
423; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
424; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40]
425; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
426; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
427; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30]
428; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
429; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
430; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x20]
431; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
432; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
433; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
434; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
435; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40]
436; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
437; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x30]
438; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
439; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x20]
440; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
441; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x03]
442; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x03]
443; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x1c]
444; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
445; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x54]
446; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x18]
447; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
448; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x60]
449; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x14]
450; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
451; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
452; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x10]
453; FMACALL32_BDVER2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
454; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x1c]
455; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x18,0x10]
456; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[2,3]
457; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x14,0x20]
458; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0],xmm0[3]
459; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x10,0x30]
460; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1,2],mem[0]
461; FMACALL32_BDVER2-NEXT:    addl $108, %esp ## encoding: [0x83,0xc4,0x6c]
462; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
463entry:
464  %call = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
465  ret <4 x float> %call
466}
467
468define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #0 {
469; FMA32-LABEL: test_v8f32:
470; FMA32:       ## %bb.0: ## %entry
471; FMA32-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
472; FMA32-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
473; FMA32-NEXT:    retl ## encoding: [0xc3]
474;
475; FMA64-LABEL: test_v8f32:
476; FMA64:       ## %bb.0: ## %entry
477; FMA64-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
478; FMA64-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
479; FMA64-NEXT:    retq ## encoding: [0xc3]
480;
481; FMACALL64-LABEL: test_v8f32:
482; FMACALL64:       ## %bb.0: ## %entry
483; FMACALL64-NEXT:    subq $136, %rsp ## encoding: [0x48,0x81,0xec,0x88,0x00,0x00,0x00]
484; FMACALL64-NEXT:    movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
485; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x6c,0x24,0x50]
486; FMACALL64-NEXT:    movaps %xmm4, (%rsp) ## 16-byte Spill
487; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x24,0x24]
488; FMACALL64-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
489; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x5c,0x24,0x40]
490; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
491; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x60]
492; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
493; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x30]
494; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
495; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x10]
496; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
497; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
498; FMACALL64-NEXT:    movaps %xmm2, %xmm1 ## encoding: [0x0f,0x28,0xca]
499; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm1 ## encoding: [0x0f,0xc6,0xca,0xff]
500; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3],xmm2[3,3]
501; FMACALL64-NEXT:    movaps %xmm4, %xmm2 ## encoding: [0x0f,0x28,0xd4]
502; FMACALL64-NEXT:    shufps $255, %xmm4, %xmm2 ## encoding: [0x0f,0xc6,0xd4,0xff]
503; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3],xmm4[3,3]
504; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
505; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
506; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
507; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x20]
508; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
509; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x10]
510; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
511; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
512; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
513; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x60]
514; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
515; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
516; FMACALL64-NEXT:    movaps (%rsp), %xmm2 ## 16-byte Reload
517; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x14,0x24]
518; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
519; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
520; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
521; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
522; FMACALL64-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
523; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x44,0x24,0x20]
524; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
525; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
526; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x70]
527; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
528; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x10]
529; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
530; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x60]
531; FMACALL64-NEXT:    movaps (%rsp), %xmm2 ## 16-byte Reload
532; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x14,0x24]
533; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
534; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
535; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
536; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x20]
537; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
538; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x10]
539; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
540; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
541; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
542; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x60]
543; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
544; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
545; FMACALL64-NEXT:    movaps (%rsp), %xmm2 ## 16-byte Reload
546; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x14,0x24]
547; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
548; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
549; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
550; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
551; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
552; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
553; FMACALL64-NEXT:    unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8]
554; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
555; FMACALL64-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
556; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x4c,0x24,0x70]
557; FMACALL64-NEXT:    ## xmm1 = xmm1[0],mem[0]
558; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
559; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x20]
560; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
561; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
562; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
563; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
564; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
565; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x40]
566; FMACALL64-NEXT:    shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff]
567; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3,3,3]
568; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
569; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x50]
570; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff]
571; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3,3,3]
572; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
573; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
574; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
575; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
576; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
577; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
578; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
579; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
580; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
581; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x40]
582; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
583; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
584; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
585; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x50]
586; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
587; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
588; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
589; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
590; FMACALL64-NEXT:    unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload
591; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x04,0x24]
592; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
593; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
594; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
595; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
596; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
597; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
598; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x40]
599; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
600; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x50]
601; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
602; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
603; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
604; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x10]
605; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
606; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
607; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
608; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
609; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
610; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x40]
611; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
612; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
613; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
614; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x50]
615; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
616; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
617; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
618; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
619; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
620; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
621; FMACALL64-NEXT:    unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8]
622; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
623; FMACALL64-NEXT:    unpcklpd (%rsp), %xmm1 ## 16-byte Folded Reload
624; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x0c,0x24]
625; FMACALL64-NEXT:    ## xmm1 = xmm1[0],mem[0]
626; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
627; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x20]
628; FMACALL64-NEXT:    addq $136, %rsp ## encoding: [0x48,0x81,0xc4,0x88,0x00,0x00,0x00]
629; FMACALL64-NEXT:    retq ## encoding: [0xc3]
630;
631; AVX512-LABEL: test_v8f32:
632; AVX512:       ## %bb.0: ## %entry
633; AVX512-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
634; AVX512-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
635; AVX512-NEXT:    retq ## encoding: [0xc3]
636;
637; AVX512VL-LABEL: test_v8f32:
638; AVX512VL:       ## %bb.0: ## %entry
639; AVX512VL-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
640; AVX512VL-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
641; AVX512VL-NEXT:    retq ## encoding: [0xc3]
642;
643; FMACALL32_BDVER2-LABEL: test_v8f32:
644; FMACALL32_BDVER2:       ## %bb.0: ## %entry
645; FMACALL32_BDVER2-NEXT:    subl $284, %esp ## encoding: [0x81,0xec,0x1c,0x01,0x00,0x00]
646; FMACALL32_BDVER2-NEXT:    ## imm = 0x11C
647; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm2, %xmm3 ## encoding: [0xc4,0xe3,0x7d,0x19,0xd3,0x01]
648; FMACALL32_BDVER2-NEXT:    vmovups %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
649; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x11,0x94,0x24,0xf0,0x00,0x00,0x00]
650; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm1, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xca,0x01]
651; FMACALL32_BDVER2-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
652; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x11,0x8c,0x24,0xd0,0x00,0x00,0x00]
653; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm0, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
654; FMACALL32_BDVER2-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
655; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x11,0x84,0x24,0xb0,0x00,0x00,0x00]
656; FMACALL32_BDVER2-NEXT:    vmovaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
657; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x5c,0x24,0x50]
658; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm3, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x5c,0x24,0x08,0x02]
659; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
660; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x40]
661; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x02]
662; FMACALL32_BDVER2-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
663; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x30]
664; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x02]
665; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
666; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
667; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
668; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
669; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xa4,0x00,0x00,0x00]
670; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
671; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50]
672; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
673; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x40]
674; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
675; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x30]
676; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
677; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01]
678; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01]
679; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
680; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
681; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
682; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x98,0x00,0x00,0x00]
683; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
684; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50]
685; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
686; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
687; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40]
688; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
689; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
690; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30]
691; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
692; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
693; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
694; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
695; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x8c,0x00,0x00,0x00]
696; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
697; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xf0,0x00,0x00,0x00]
698; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
699; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
700; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xd0,0x00,0x00,0x00]
701; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03]
702; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
703; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xb0,0x00,0x00,0x00]
704; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03]
705; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
706; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
707; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
708; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
709; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x80,0x00,0x00,0x00]
710; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
711; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xf0,0x00,0x00,0x00]
712; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02]
713; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
714; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xd0,0x00,0x00,0x00]
715; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02]
716; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
717; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xb0,0x00,0x00,0x00]
718; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02]
719; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
720; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
721; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
722; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
723; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x74]
724; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
725; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xf0,0x00,0x00,0x00]
726; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
727; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
728; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xd0,0x00,0x00,0x00]
729; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01]
730; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
731; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xb0,0x00,0x00,0x00]
732; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01]
733; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
734; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
735; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
736; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
737; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x68]
738; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
739; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xf0,0x00,0x00,0x00]
740; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
741; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
742; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xd0,0x00,0x00,0x00]
743; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
744; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
745; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xb0,0x00,0x00,0x00]
746; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
747; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
748; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
749; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
750; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
751; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50]
752; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
753; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x40]
754; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
755; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x30]
756; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
757; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x03]
758; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x03]
759; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x2c]
760; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
761; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x68]
762; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x28]
763; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
764; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x74]
765; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x24]
766; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
767; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x80,0x00,0x00,0x00]
768; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x20]
769; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
770; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x8c,0x00,0x00,0x00]
771; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x1c]
772; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
773; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x98,0x00,0x00,0x00]
774; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x18]
775; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
776; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xa4,0x00,0x00,0x00]
777; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x14]
778; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
779; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
780; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x10]
781; FMACALL32_BDVER2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
782; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x2c]
783; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x28,0x10]
784; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[2,3]
785; FMACALL32_BDVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
786; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x1c]
787; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x18,0x10]
788; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0],mem[0],xmm1[2,3]
789; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x24,0x20]
790; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0],xmm0[3]
791; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x14,0x20]
792; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1],mem[0],xmm1[3]
793; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x20,0x30]
794; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1,2],mem[0]
795; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x10,0x30]
796; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1,2],mem[0]
797; FMACALL32_BDVER2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
798; FMACALL32_BDVER2-NEXT:    addl $284, %esp ## encoding: [0x81,0xc4,0x1c,0x01,0x00,0x00]
799; FMACALL32_BDVER2-NEXT:    ## imm = 0x11C
800; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
801entry:
802  %call = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
803  ret <8 x float> %call
804}
805
806define <16 x float> @test_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c) #0 {
807; FMA32-LABEL: test_v16f32:
808; FMA32:       ## %bb.0: ## %entry
809; FMA32-NEXT:    pushl %ebp ## encoding: [0x55]
810; FMA32-NEXT:    movl %esp, %ebp ## encoding: [0x89,0xe5]
811; FMA32-NEXT:    andl $-32, %esp ## encoding: [0x83,0xe4,0xe0]
812; FMA32-NEXT:    subl $32, %esp ## encoding: [0x83,0xec,0x20]
813; FMA32-NEXT:    vfmadd213ps 8(%ebp), %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0x6d,0xa8,0x45,0x08]
814; FMA32-NEXT:    ## ymm0 = (ymm2 * ymm0) + mem
815; FMA32-NEXT:    vfmadd213ps 40(%ebp), %ymm3, %ymm1 ## encoding: [0xc4,0xe2,0x65,0xa8,0x4d,0x28]
816; FMA32-NEXT:    ## ymm1 = (ymm3 * ymm1) + mem
817; FMA32-NEXT:    movl %ebp, %esp ## encoding: [0x89,0xec]
818; FMA32-NEXT:    popl %ebp ## encoding: [0x5d]
819; FMA32-NEXT:    retl ## encoding: [0xc3]
820;
821; FMA64-LABEL: test_v16f32:
822; FMA64:       ## %bb.0: ## %entry
823; FMA64-NEXT:    vfmadd213ps %ymm4, %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0x6d,0xa8,0xc4]
824; FMA64-NEXT:    ## ymm0 = (ymm2 * ymm0) + ymm4
825; FMA64-NEXT:    vfmadd213ps %ymm5, %ymm3, %ymm1 ## encoding: [0xc4,0xe2,0x65,0xa8,0xcd]
826; FMA64-NEXT:    ## ymm1 = (ymm3 * ymm1) + ymm5
827; FMA64-NEXT:    retq ## encoding: [0xc3]
828;
829; FMACALL64-LABEL: test_v16f32:
830; FMACALL64:       ## %bb.0: ## %entry
831; FMACALL64-NEXT:    subq $168, %rsp ## encoding: [0x48,0x81,0xec,0xa8,0x00,0x00,0x00]
832; FMACALL64-NEXT:    movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
833; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0xbc,0x24,0x80,0x00,0x00,0x00]
834; FMACALL64-NEXT:    movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
835; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x74,0x24,0x20]
836; FMACALL64-NEXT:    movaps %xmm5, (%rsp) ## 16-byte Spill
837; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x2c,0x24]
838; FMACALL64-NEXT:    movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
839; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x64,0x24,0x10]
840; FMACALL64-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
841; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x5c,0x24,0x70]
842; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
843; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x40]
844; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
845; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x30]
846; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
847; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x50]
848; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00]
849; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
850; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
851; FMACALL64-NEXT:    movaps %xmm4, %xmm1 ## encoding: [0x0f,0x28,0xcc]
852; FMACALL64-NEXT:    shufps $255, %xmm4, %xmm1 ## encoding: [0x0f,0xc6,0xcc,0xff]
853; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3],xmm4[3,3]
854; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff]
855; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3,3,3]
856; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
857; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
858; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
859; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x60]
860; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
861; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x50]
862; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
863; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
864; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
865; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
866; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
867; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
868; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00]
869; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
870; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
871; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
872; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
873; FMACALL64-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
874; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x44,0x24,0x60]
875; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
876; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
877; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x84,0x24,0x90,0x00,0x00,0x00]
878; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
879; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x50]
880; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
881; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
882; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00]
883; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
884; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
885; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
886; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x60]
887; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
888; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x50]
889; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
890; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
891; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
892; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
893; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
894; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
895; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00]
896; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
897; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
898; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
899; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
900; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
901; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x60]
902; FMACALL64-NEXT:    unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8]
903; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
904; FMACALL64-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
905; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x8c,0x24,0x90,0x00,0x00,0x00]
906; FMACALL64-NEXT:    ## xmm1 = xmm1[0],mem[0]
907; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
908; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x60]
909; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
910; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
911; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
912; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
913; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
914; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
915; FMACALL64-NEXT:    shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff]
916; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3,3,3]
917; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
918; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff]
919; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3,3,3]
920; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
921; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
922; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
923; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x10]
924; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
925; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
926; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
927; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
928; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
929; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
930; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
931; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
932; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
933; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
934; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
935; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
936; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
937; FMACALL64-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
938; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x44,0x24,0x10]
939; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
940; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
941; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x50]
942; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
943; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
944; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
945; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
946; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
947; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
948; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
949; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
950; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x10]
951; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
952; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
953; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
954; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
955; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
956; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
957; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
958; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
959; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
960; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
961; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
962; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
963; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
964; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
965; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
966; FMACALL64-NEXT:    unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8]
967; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
968; FMACALL64-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
969; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x4c,0x24,0x50]
970; FMACALL64-NEXT:    ## xmm1 = xmm1[0],mem[0]
971; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
972; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x10]
973; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
974; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x40]
975; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
976; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
977; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
978; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
979; FMACALL64-NEXT:    shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff]
980; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3,3,3]
981; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00]
982; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff]
983; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3,3,3]
984; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
985; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
986; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
987; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
988; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
989; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x40]
990; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
991; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
992; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
993; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
994; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
995; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
996; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00]
997; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
998; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
999; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1000; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1001; FMACALL64-NEXT:    unpcklps (%rsp), %xmm0 ## 16-byte Folded Reload
1002; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x04,0x24]
1003; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
1004; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1005; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x30]
1006; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1007; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x40]
1008; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1009; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
1010; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00]
1011; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1012; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1013; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
1014; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
1015; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1016; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x40]
1017; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
1018; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
1019; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1020; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
1021; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
1022; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
1023; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00]
1024; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
1025; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
1026; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1027; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1028; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
1029; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
1030; FMACALL64-NEXT:    unpcklps %xmm0, %xmm1 ## encoding: [0x0f,0x14,0xc8]
1031; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1032; FMACALL64-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
1033; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x4c,0x24,0x30]
1034; FMACALL64-NEXT:    ## xmm1 = xmm1[0],mem[0]
1035; FMACALL64-NEXT:    movaps %xmm1, (%rsp) ## 16-byte Spill
1036; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x0c,0x24]
1037; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1038; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x70]
1039; FMACALL64-NEXT:    shufps $255, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0xff]
1040; FMACALL64-NEXT:    ## xmm0 = xmm0[3,3,3,3]
1041; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1042; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x8c,0x24,0x80,0x00,0x00,0x00]
1043; FMACALL64-NEXT:    shufps $255, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0xff]
1044; FMACALL64-NEXT:    ## xmm1 = xmm1[3,3,3,3]
1045; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xe0,0x00,0x00,0x00]
1046; FMACALL64-NEXT:    shufps $255, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0xff]
1047; FMACALL64-NEXT:    ## xmm2 = xmm2[3,3,3,3]
1048; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1049; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1050; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1051; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x20]
1052; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1053; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x70]
1054; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1055; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1056; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1057; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x8c,0x24,0x80,0x00,0x00,0x00]
1058; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1059; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1060; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xe0,0x00,0x00,0x00]
1061; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1062; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1063; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1064; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1065; FMACALL64-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
1066; FMACALL64-NEXT:    ## encoding: [0x0f,0x14,0x44,0x24,0x20]
1067; FMACALL64-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
1068; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1069; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x20]
1070; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1071; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x70]
1072; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1073; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x8c,0x24,0x80,0x00,0x00,0x00]
1074; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xe0,0x00,0x00,0x00]
1075; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1076; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1077; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1078; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x40]
1079; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1080; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x70]
1081; FMACALL64-NEXT:    shufps $85, %xmm0, %xmm0 ## encoding: [0x0f,0xc6,0xc0,0x55]
1082; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1,1,1]
1083; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1084; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x8c,0x24,0x80,0x00,0x00,0x00]
1085; FMACALL64-NEXT:    shufps $85, %xmm1, %xmm1 ## encoding: [0x0f,0xc6,0xc9,0x55]
1086; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1,1,1]
1087; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xe0,0x00,0x00,0x00]
1088; FMACALL64-NEXT:    shufps $85, %xmm2, %xmm2 ## encoding: [0x0f,0xc6,0xd2,0x55]
1089; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1,1,1]
1090; FMACALL64-NEXT:    callq _fmaf ## encoding: [0xe8,A,A,A,A]
1091; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: reloc_branch_4byte_pcrel
1092; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Reload
1093; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x5c,0x24,0x40]
1094; FMACALL64-NEXT:    unpcklps %xmm0, %xmm3 ## encoding: [0x0f,0x14,0xd8]
1095; FMACALL64-NEXT:    ## xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
1096; FMACALL64-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Folded Reload
1097; FMACALL64-NEXT:    ## encoding: [0x66,0x0f,0x14,0x5c,0x24,0x20]
1098; FMACALL64-NEXT:    ## xmm3 = xmm3[0],mem[0]
1099; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1100; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x60]
1101; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1102; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
1103; FMACALL64-NEXT:    movaps (%rsp), %xmm2 ## 16-byte Reload
1104; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x14,0x24]
1105; FMACALL64-NEXT:    addq $168, %rsp ## encoding: [0x48,0x81,0xc4,0xa8,0x00,0x00,0x00]
1106; FMACALL64-NEXT:    retq ## encoding: [0xc3]
1107;
1108; AVX512-LABEL: test_v16f32:
1109; AVX512:       ## %bb.0: ## %entry
1110; AVX512-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
1111; AVX512-NEXT:    ## zmm0 = (zmm1 * zmm0) + zmm2
1112; AVX512-NEXT:    retq ## encoding: [0xc3]
1113;
1114; AVX512VL-LABEL: test_v16f32:
1115; AVX512VL:       ## %bb.0: ## %entry
1116; AVX512VL-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
1117; AVX512VL-NEXT:    ## zmm0 = (zmm1 * zmm0) + zmm2
1118; AVX512VL-NEXT:    retq ## encoding: [0xc3]
1119;
1120; FMACALL32_BDVER2-LABEL: test_v16f32:
1121; FMACALL32_BDVER2:       ## %bb.0: ## %entry
1122; FMACALL32_BDVER2-NEXT:    pushl %ebp ## encoding: [0x55]
1123; FMACALL32_BDVER2-NEXT:    movl %esp, %ebp ## encoding: [0x89,0xe5]
1124; FMACALL32_BDVER2-NEXT:    andl $-32, %esp ## encoding: [0x83,0xe4,0xe0]
1125; FMACALL32_BDVER2-NEXT:    subl $448, %esp ## encoding: [0x81,0xec,0xc0,0x01,0x00,0x00]
1126; FMACALL32_BDVER2-NEXT:    ## imm = 0x1C0
1127; FMACALL32_BDVER2-NEXT:    vmovaps 56(%ebp), %xmm4 ## encoding: [0xc5,0xf8,0x28,0x65,0x38]
1128; FMACALL32_BDVER2-NEXT:    vmovaps %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1129; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x94,0x24,0x60,0x01,0x00,0x00]
1130; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm3, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xda,0x01]
1131; FMACALL32_BDVER2-NEXT:    vmovaps %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1132; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x4c,0x24,0x60]
1133; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm1, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc9,0x01]
1134; FMACALL32_BDVER2-NEXT:    vmovaps %ymm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1135; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x9c,0x24,0x80,0x00,0x00,0x00]
1136; FMACALL32_BDVER2-NEXT:    vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1137; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x84,0x24,0x80,0x01,0x00,0x00]
1138; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1139; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x94,0x24,0xc0,0x00,0x00,0x00]
1140; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x02]
1141; FMACALL32_BDVER2-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1142; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x8c,0x24,0xb0,0x00,0x00,0x00]
1143; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x02]
1144; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm4, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x64,0x24,0x08,0x02]
1145; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1146; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1147; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1148; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1149; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x54,0x01,0x00,0x00]
1150; FMACALL32_BDVER2-NEXT:    vmovaps 56(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x38]
1151; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
1152; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
1153; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
1154; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x8c,0x24,0xb0,0x00,0x00,0x00]
1155; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
1156; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01]
1157; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01]
1158; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1159; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1160; FMACALL32_BDVER2-NEXT:    vmovaps 56(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x38]
1161; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1162; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x48,0x01,0x00,0x00]
1163; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
1164; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1165; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xc0,0x00,0x00,0x00]
1166; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
1167; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1168; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xb0,0x00,0x00,0x00]
1169; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
1170; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1171; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1172; FMACALL32_BDVER2-NEXT:    vmovaps 40(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x28]
1173; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1174; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x3c,0x01,0x00,0x00]
1175; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
1176; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1177; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00]
1178; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03]
1179; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1180; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60]
1181; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03]
1182; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1183; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1184; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1185; FMACALL32_BDVER2-NEXT:    vmovaps 40(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x28]
1186; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1187; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x30,0x01,0x00,0x00]
1188; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02]
1189; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1190; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00]
1191; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02]
1192; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1193; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60]
1194; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02]
1195; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1196; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1197; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1198; FMACALL32_BDVER2-NEXT:    vmovaps 40(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x28]
1199; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1200; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x24,0x01,0x00,0x00]
1201; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
1202; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1203; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00]
1204; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01]
1205; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1206; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60]
1207; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01]
1208; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1209; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1210; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1211; FMACALL32_BDVER2-NEXT:    vmovaps 40(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x28]
1212; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1213; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x18,0x01,0x00,0x00]
1214; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
1215; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1216; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00]
1217; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
1218; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1219; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60]
1220; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
1221; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1222; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1223; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1224; FMACALL32_BDVER2-NEXT:    vmovaps 24(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x18]
1225; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1226; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x0c,0x01,0x00,0x00]
1227; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
1228; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1229; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00]
1230; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
1231; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1232; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0x80,0x00,0x00,0x00]
1233; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03]
1234; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1235; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00]
1236; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
1237; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1238; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x60]
1239; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03]
1240; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1241; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1242; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1243; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1244; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x00,0x01,0x00,0x00]
1245; FMACALL32_BDVER2-NEXT:    vmovaps 24(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x18]
1246; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
1247; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0x80,0x00,0x00,0x00]
1248; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
1249; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x60]
1250; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02]
1251; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x02]
1252; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x02]
1253; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1254; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1255; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1256; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xf4,0x00,0x00,0x00]
1257; FMACALL32_BDVER2-NEXT:    vmovaps 24(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x18]
1258; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
1259; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0x80,0x00,0x00,0x00]
1260; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
1261; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x60]
1262; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
1263; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01]
1264; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01]
1265; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1266; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1267; FMACALL32_BDVER2-NEXT:    vmovaps 24(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x18]
1268; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1269; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xe8,0x00,0x00,0x00]
1270; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
1271; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1272; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x80,0x00,0x00,0x00]
1273; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
1274; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1275; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x60]
1276; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
1277; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1278; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1279; FMACALL32_BDVER2-NEXT:    vmovaps 8(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x08]
1280; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1281; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x80,0x00,0x00,0x00]
1282; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
1283; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1284; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00]
1285; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03]
1286; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1287; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00]
1288; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03]
1289; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1290; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1291; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1292; FMACALL32_BDVER2-NEXT:    vmovaps 8(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x08]
1293; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1294; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x60]
1295; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02]
1296; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1297; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00]
1298; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02]
1299; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1300; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00]
1301; FMACALL32_BDVER2-NEXT:    vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02]
1302; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1303; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1304; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1305; FMACALL32_BDVER2-NEXT:    vmovaps 8(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x08]
1306; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1307; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xdc,0x00,0x00,0x00]
1308; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01]
1309; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1310; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00]
1311; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01]
1312; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1313; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00]
1314; FMACALL32_BDVER2-NEXT:    vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01]
1315; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1316; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1317; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1318; FMACALL32_BDVER2-NEXT:    vmovaps 8(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x08]
1319; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1320; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xd0,0x00,0x00,0x00]
1321; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08]
1322; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1323; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00]
1324; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04]
1325; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1326; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00]
1327; FMACALL32_BDVER2-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
1328; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1329; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1330; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1331; FMACALL32_BDVER2-NEXT:    vmovaps 56(%ebp), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x45,0x38]
1332; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
1333; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
1334; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
1335; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x8c,0x24,0xb0,0x00,0x00,0x00]
1336; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03]
1337; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x03]
1338; FMACALL32_BDVER2-NEXT:    vextractps $3, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x03]
1339; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x3c]
1340; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1341; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xd0,0x00,0x00,0x00]
1342; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x38]
1343; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1344; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xdc,0x00,0x00,0x00]
1345; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x34]
1346; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1347; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x60]
1348; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x30]
1349; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1350; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x80,0x00,0x00,0x00]
1351; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x2c]
1352; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1353; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xe8,0x00,0x00,0x00]
1354; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x28]
1355; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1356; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xf4,0x00,0x00,0x00]
1357; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x24]
1358; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1359; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x00,0x01,0x00,0x00]
1360; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x20]
1361; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1362; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x0c,0x01,0x00,0x00]
1363; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x5c]
1364; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1365; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x18,0x01,0x00,0x00]
1366; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x58]
1367; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1368; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x24,0x01,0x00,0x00]
1369; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x54]
1370; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1371; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x30,0x01,0x00,0x00]
1372; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x50]
1373; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1374; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x3c,0x01,0x00,0x00]
1375; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x4c]
1376; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1377; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x48,0x01,0x00,0x00]
1378; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x48]
1379; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1380; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x54,0x01,0x00,0x00]
1381; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x44]
1382; FMACALL32_BDVER2-NEXT:    calll _fmaf ## encoding: [0xe8,A,A,A,A]
1383; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4
1384; FMACALL32_BDVER2-NEXT:    fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x40]
1385; FMACALL32_BDVER2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1386; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x3c]
1387; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x38,0x10]
1388; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[2,3]
1389; FMACALL32_BDVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1390; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x2c]
1391; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x28,0x10]
1392; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0],mem[0],xmm1[2,3]
1393; FMACALL32_BDVER2-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1394; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x54,0x24,0x4c]
1395; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x48,0x10]
1396; FMACALL32_BDVER2-NEXT:    ## xmm2 = xmm2[0],mem[0],xmm2[2,3]
1397; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x34,0x20]
1398; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0],xmm0[3]
1399; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x24,0x20]
1400; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1],mem[0],xmm1[3]
1401; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x44,0x20]
1402; FMACALL32_BDVER2-NEXT:    ## xmm2 = xmm2[0,1],mem[0],xmm2[3]
1403; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x30,0x30]
1404; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1,2],mem[0]
1405; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x20,0x30]
1406; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1,2],mem[0]
1407; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x40,0x30]
1408; FMACALL32_BDVER2-NEXT:    ## xmm2 = xmm2[0,1,2],mem[0]
1409; FMACALL32_BDVER2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
1410; FMACALL32_BDVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1411; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x5c]
1412; FMACALL32_BDVER2-NEXT:    vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x58,0x10]
1413; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0],mem[0],xmm1[2,3]
1414; FMACALL32_BDVER2-NEXT:    vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x54,0x20]
1415; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1],mem[0],xmm1[3]
1416; FMACALL32_BDVER2-NEXT:    vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x50,0x30]
1417; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1,2],mem[0]
1418; FMACALL32_BDVER2-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1 ## encoding: [0xc4,0xe3,0x75,0x18,0xca,0x01]
1419; FMACALL32_BDVER2-NEXT:    movl %ebp, %esp ## encoding: [0x89,0xec]
1420; FMACALL32_BDVER2-NEXT:    popl %ebp ## encoding: [0x5d]
1421; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
1422entry:
1423  %call = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c)
1424  ret <16 x float> %call
1425}
1426
1427define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
1428; FMA32-LABEL: test_v2f64:
1429; FMA32:       ## %bb.0:
1430; FMA32-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1431; FMA32-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1432; FMA32-NEXT:    retl ## encoding: [0xc3]
1433;
1434; FMA64-LABEL: test_v2f64:
1435; FMA64:       ## %bb.0:
1436; FMA64-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1437; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1438; FMA64-NEXT:    retq ## encoding: [0xc3]
1439;
1440; FMACALL64-LABEL: test_v2f64:
1441; FMACALL64:       ## %bb.0:
1442; FMACALL64-NEXT:    subq $72, %rsp ## encoding: [0x48,0x83,0xec,0x48]
1443; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1444; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x20]
1445; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1446; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x10]
1447; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
1448; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
1449; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1450; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1451; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1452; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x30]
1453; FMACALL64-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload
1454; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x04,0x24]
1455; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1456; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1457; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1458; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
1459; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1460; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1461; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
1462; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x20]
1463; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1464; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1465; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1466; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1467; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1468; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x30]
1469; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
1470; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
1471; FMACALL64-NEXT:    movaps %xmm1, %xmm0 ## encoding: [0x0f,0x28,0xc1]
1472; FMACALL64-NEXT:    addq $72, %rsp ## encoding: [0x48,0x83,0xc4,0x48]
1473; FMACALL64-NEXT:    retq ## encoding: [0xc3]
1474;
1475; AVX512-LABEL: test_v2f64:
1476; AVX512:       ## %bb.0:
1477; AVX512-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1478; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1479; AVX512-NEXT:    retq ## encoding: [0xc3]
1480;
1481; AVX512VL-LABEL: test_v2f64:
1482; AVX512VL:       ## %bb.0:
1483; AVX512VL-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1484; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1485; AVX512VL-NEXT:    retq ## encoding: [0xc3]
1486;
1487; FMACALL32_BDVER2-LABEL: test_v2f64:
1488; FMACALL32_BDVER2:       ## %bb.0:
1489; FMACALL32_BDVER2-NEXT:    subl $108, %esp ## encoding: [0x83,0xec,0x6c]
1490; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1491; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x50]
1492; FMACALL32_BDVER2-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc1]
1493; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],xmm1[0]
1494; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1495; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x30]
1496; FMACALL32_BDVER2-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1497; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x40]
1498; FMACALL32_BDVER2-NEXT:    vmovlps %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x54,0x24,0x10]
1499; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1500; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1501; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1502; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1503; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30]
1504; FMACALL32_BDVER2-NEXT:    vmovhps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x17,0x44,0x24,0x10]
1505; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1506; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40]
1507; FMACALL32_BDVER2-NEXT:    vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
1508; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x12,0x44,0x24,0x58]
1509; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0,1],xmm0[2,3]
1510; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1511; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x28]
1512; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1513; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1514; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x20]
1515; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1516; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x28]
1517; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0x44,0x24,0x20]
1518; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0,1]
1519; FMACALL32_BDVER2-NEXT:    addl $108, %esp ## encoding: [0x83,0xc4,0x6c]
1520; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
1521  %call = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
1522  ret <2 x double> %call
1523}
1524
1525define <2 x double> @test_v2f64_reassoc(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
1526; FMA32-LABEL: test_v2f64_reassoc:
1527; FMA32:       ## %bb.0:
1528; FMA32-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1529; FMA32-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1530; FMA32-NEXT:    retl ## encoding: [0xc3]
1531;
1532; FMACALL32-LABEL: test_v2f64_reassoc:
1533; FMACALL32:       ## %bb.0:
1534; FMACALL32-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x59,0xc1]
1535; FMACALL32-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc2]
1536; FMACALL32-NEXT:    retl ## encoding: [0xc3]
1537;
1538; FMA64-LABEL: test_v2f64_reassoc:
1539; FMA64:       ## %bb.0:
1540; FMA64-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1541; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1542; FMA64-NEXT:    retq ## encoding: [0xc3]
1543;
1544; FMACALL64-LABEL: test_v2f64_reassoc:
1545; FMACALL64:       ## %bb.0:
1546; FMACALL64-NEXT:    mulpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x59,0xc1]
1547; FMACALL64-NEXT:    addpd %xmm2, %xmm0 ## encoding: [0x66,0x0f,0x58,0xc2]
1548; FMACALL64-NEXT:    retq ## encoding: [0xc3]
1549;
1550; AVX512-LABEL: test_v2f64_reassoc:
1551; AVX512:       ## %bb.0:
1552; AVX512-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1553; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1554; AVX512-NEXT:    retq ## encoding: [0xc3]
1555;
1556; AVX512VL-LABEL: test_v2f64_reassoc:
1557; AVX512VL:       ## %bb.0:
1558; AVX512VL-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
1559; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
1560; AVX512VL-NEXT:    retq ## encoding: [0xc3]
1561  %call = call reassoc <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
1562  ret <2 x double> %call
1563}
1564
1565define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 {
1566; FMA32-LABEL: test_v4f64:
1567; FMA32:       ## %bb.0: ## %entry
1568; FMA32-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
1569; FMA32-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
1570; FMA32-NEXT:    retl ## encoding: [0xc3]
1571;
1572; FMA64-LABEL: test_v4f64:
1573; FMA64:       ## %bb.0: ## %entry
1574; FMA64-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
1575; FMA64-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
1576; FMA64-NEXT:    retq ## encoding: [0xc3]
1577;
1578; FMACALL64-LABEL: test_v4f64:
1579; FMACALL64:       ## %bb.0: ## %entry
1580; FMACALL64-NEXT:    subq $120, %rsp ## encoding: [0x48,0x83,0xec,0x78]
1581; FMACALL64-NEXT:    movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1582; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x6c,0x24,0x40]
1583; FMACALL64-NEXT:    movaps %xmm4, (%rsp) ## 16-byte Spill
1584; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x24,0x24]
1585; FMACALL64-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1586; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x5c,0x24,0x30]
1587; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1588; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x60]
1589; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1590; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x20]
1591; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1592; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x50]
1593; FMACALL64-NEXT:    movaps %xmm2, %xmm1 ## encoding: [0x0f,0x28,0xca]
1594; FMACALL64-NEXT:    movaps %xmm4, %xmm2 ## encoding: [0x0f,0x28,0xd4]
1595; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1596; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1597; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1598; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x10]
1599; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1600; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x50]
1601; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1602; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1603; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1604; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x60]
1605; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1606; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1607; FMACALL64-NEXT:    movaps (%rsp), %xmm2 ## 16-byte Reload
1608; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x14,0x24]
1609; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1610; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1611; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1612; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1613; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1614; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
1615; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
1616; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
1617; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1618; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x10]
1619; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1620; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x20]
1621; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1622; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x30]
1623; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
1624; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x40]
1625; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1626; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1627; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
1628; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
1629; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1630; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x20]
1631; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1632; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1633; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1634; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x30]
1635; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1636; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1637; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
1638; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x40]
1639; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1640; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1641; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1642; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1643; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
1644; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
1645; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
1646; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
1647; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1648; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x10]
1649; FMACALL64-NEXT:    addq $120, %rsp ## encoding: [0x48,0x83,0xc4,0x78]
1650; FMACALL64-NEXT:    retq ## encoding: [0xc3]
1651;
1652; AVX512-LABEL: test_v4f64:
1653; AVX512:       ## %bb.0: ## %entry
1654; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
1655; AVX512-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
1656; AVX512-NEXT:    retq ## encoding: [0xc3]
1657;
1658; AVX512VL-LABEL: test_v4f64:
1659; AVX512VL:       ## %bb.0: ## %entry
1660; AVX512VL-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
1661; AVX512VL-NEXT:    ## ymm0 = (ymm1 * ymm0) + ymm2
1662; AVX512VL-NEXT:    retq ## encoding: [0xc3]
1663;
1664; FMACALL32_BDVER2-LABEL: test_v4f64:
1665; FMACALL32_BDVER2:       ## %bb.0: ## %entry
1666; FMACALL32_BDVER2-NEXT:    subl $236, %esp ## encoding: [0x81,0xec,0xec,0x00,0x00,0x00]
1667; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm2, %xmm3 ## encoding: [0xc4,0xe3,0x7d,0x19,0xd3,0x01]
1668; FMACALL32_BDVER2-NEXT:    vmovups %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1669; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x11,0x94,0x24,0xc0,0x00,0x00,0x00]
1670; FMACALL32_BDVER2-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1671; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x11,0x84,0x24,0x80,0x00,0x00,0x00]
1672; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm1, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xca,0x01]
1673; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
1674; FMACALL32_BDVER2-NEXT:    vmovups %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1675; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x11,0x8c,0x24,0xa0,0x00,0x00,0x00]
1676; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1677; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x60]
1678; FMACALL32_BDVER2-NEXT:    vmovlhps %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc2]
1679; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],xmm2[0]
1680; FMACALL32_BDVER2-NEXT:    vmovaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1681; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x5c,0x24,0x70]
1682; FMACALL32_BDVER2-NEXT:    vmovlps %xmm3, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x5c,0x24,0x10]
1683; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1684; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x50]
1685; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1686; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1687; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1688; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1689; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1690; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x44]
1691; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1692; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00]
1693; FMACALL32_BDVER2-NEXT:    vmovhps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x17,0x44,0x24,0x10]
1694; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1695; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x80,0x00,0x00,0x00]
1696; FMACALL32_BDVER2-NEXT:    vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
1697; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0xa0,0x00,0x00,0x00]
1698; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[1],mem[1]
1699; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1700; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1701; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1702; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1703; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1704; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x38]
1705; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1706; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00]
1707; FMACALL32_BDVER2-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x44,0x24,0x10]
1708; FMACALL32_BDVER2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1709; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x80,0x00,0x00,0x00]
1710; FMACALL32_BDVER2-NEXT:    vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
1711; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0xa0,0x00,0x00,0x00]
1712; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0]
1713; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1714; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1715; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1716; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1717; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1718; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x70]
1719; FMACALL32_BDVER2-NEXT:    vmovhps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x17,0x44,0x24,0x10]
1720; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
1721; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50]
1722; FMACALL32_BDVER2-NEXT:    vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
1723; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x12,0x44,0x24,0x68]
1724; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0,1],xmm0[2,3]
1725; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1726; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x30]
1727; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1728; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x38]
1729; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x28]
1730; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
1731; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x44]
1732; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x20]
1733; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1734; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1735; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x18]
1736; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1737; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x30]
1738; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
1739; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x20]
1740; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0x44,0x24,0x28]
1741; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0,1]
1742; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x16,0x4c,0x24,0x18]
1743; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1],mem[0,1]
1744; FMACALL32_BDVER2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
1745; FMACALL32_BDVER2-NEXT:    addl $236, %esp ## encoding: [0x81,0xc4,0xec,0x00,0x00,0x00]
1746; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
1747entry:
1748  %call = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
1749  ret <4 x double> %call
1750}
1751
1752define <8 x double> @test_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c) #0 {
1753; FMA32-LABEL: test_v8f64:
1754; FMA32:       ## %bb.0: ## %entry
1755; FMA32-NEXT:    pushl %ebp ## encoding: [0x55]
1756; FMA32-NEXT:    movl %esp, %ebp ## encoding: [0x89,0xe5]
1757; FMA32-NEXT:    andl $-32, %esp ## encoding: [0x83,0xe4,0xe0]
1758; FMA32-NEXT:    subl $32, %esp ## encoding: [0x83,0xec,0x20]
1759; FMA32-NEXT:    vfmadd213pd 8(%ebp), %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0xed,0xa8,0x45,0x08]
1760; FMA32-NEXT:    ## ymm0 = (ymm2 * ymm0) + mem
1761; FMA32-NEXT:    vfmadd213pd 40(%ebp), %ymm3, %ymm1 ## encoding: [0xc4,0xe2,0xe5,0xa8,0x4d,0x28]
1762; FMA32-NEXT:    ## ymm1 = (ymm3 * ymm1) + mem
1763; FMA32-NEXT:    movl %ebp, %esp ## encoding: [0x89,0xec]
1764; FMA32-NEXT:    popl %ebp ## encoding: [0x5d]
1765; FMA32-NEXT:    retl ## encoding: [0xc3]
1766;
1767; FMA64-LABEL: test_v8f64:
1768; FMA64:       ## %bb.0: ## %entry
1769; FMA64-NEXT:    vfmadd213pd %ymm4, %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0xed,0xa8,0xc4]
1770; FMA64-NEXT:    ## ymm0 = (ymm2 * ymm0) + ymm4
1771; FMA64-NEXT:    vfmadd213pd %ymm5, %ymm3, %ymm1 ## encoding: [0xc4,0xe2,0xe5,0xa8,0xcd]
1772; FMA64-NEXT:    ## ymm1 = (ymm3 * ymm1) + ymm5
1773; FMA64-NEXT:    retq ## encoding: [0xc3]
1774;
1775; FMACALL64-LABEL: test_v8f64:
1776; FMACALL64:       ## %bb.0: ## %entry
1777; FMACALL64-NEXT:    subq $152, %rsp ## encoding: [0x48,0x81,0xec,0x98,0x00,0x00,0x00]
1778; FMACALL64-NEXT:    movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1779; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x7c,0x24,0x70]
1780; FMACALL64-NEXT:    movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1781; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x74,0x24,0x20]
1782; FMACALL64-NEXT:    movaps %xmm5, (%rsp) ## 16-byte Spill
1783; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x2c,0x24]
1784; FMACALL64-NEXT:    movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1785; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x64,0x24,0x10]
1786; FMACALL64-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1787; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x5c,0x24,0x60]
1788; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1789; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x50]
1790; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1791; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x40]
1792; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1793; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x84,0x24,0x80,0x00,0x00,0x00]
1794; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xa0,0x00,0x00,0x00]
1795; FMACALL64-NEXT:    movaps %xmm4, %xmm1 ## encoding: [0x0f,0x28,0xcc]
1796; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1797; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1798; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1799; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x30]
1800; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1801; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x84,0x24,0x80,0x00,0x00,0x00]
1802; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1803; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1804; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1805; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
1806; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1807; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1808; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xa0,0x00,0x00,0x00]
1809; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1810; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1811; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1812; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1813; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1814; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x30]
1815; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
1816; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
1817; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1818; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x30]
1819; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1820; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x40]
1821; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
1822; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
1823; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00]
1824; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1825; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1826; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1827; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x10]
1828; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1829; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x40]
1830; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1831; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1832; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
1833; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
1834; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1835; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1836; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xb0,0x00,0x00,0x00]
1837; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1838; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1839; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1840; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1841; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1842; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
1843; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
1844; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
1845; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1846; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x10]
1847; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1848; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x50]
1849; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1850; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
1851; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
1852; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1853; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1854; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
1855; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
1856; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1857; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x50]
1858; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1859; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1860; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1861; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x20]
1862; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1863; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1864; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xc0,0x00,0x00,0x00]
1865; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1866; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1867; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1868; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1869; FMACALL64-NEXT:    movaps (%rsp), %xmm1 ## 16-byte Reload
1870; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x0c,0x24]
1871; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
1872; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
1873; FMACALL64-NEXT:    movaps %xmm1, (%rsp) ## 16-byte Spill
1874; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x0c,0x24]
1875; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1876; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x60]
1877; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1878; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x70]
1879; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00]
1880; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1881; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1882; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
1883; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x20]
1884; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1885; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x60]
1886; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
1887; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
1888; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1889; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x70]
1890; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
1891; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
1892; FMACALL64-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 ## encoding: [0x0f,0x28,0x94,0x24,0xd0,0x00,0x00,0x00]
1893; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
1894; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
1895; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
1896; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
1897; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 ## 16-byte Reload
1898; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x5c,0x24,0x20]
1899; FMACALL64-NEXT:    movlhps %xmm0, %xmm3 ## encoding: [0x0f,0x16,0xd8]
1900; FMACALL64-NEXT:    ## xmm3 = xmm3[0],xmm0[0]
1901; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
1902; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x44,0x24,0x30]
1903; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
1904; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
1905; FMACALL64-NEXT:    movaps (%rsp), %xmm2 ## 16-byte Reload
1906; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x14,0x24]
1907; FMACALL64-NEXT:    addq $152, %rsp ## encoding: [0x48,0x81,0xc4,0x98,0x00,0x00,0x00]
1908; FMACALL64-NEXT:    retq ## encoding: [0xc3]
1909;
1910; AVX512-LABEL: test_v8f64:
1911; AVX512:       ## %bb.0: ## %entry
1912; AVX512-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
1913; AVX512-NEXT:    ## zmm0 = (zmm1 * zmm0) + zmm2
1914; AVX512-NEXT:    retq ## encoding: [0xc3]
1915;
1916; AVX512VL-LABEL: test_v8f64:
1917; AVX512VL:       ## %bb.0: ## %entry
1918; AVX512VL-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
1919; AVX512VL-NEXT:    ## zmm0 = (zmm1 * zmm0) + zmm2
1920; AVX512VL-NEXT:    retq ## encoding: [0xc3]
1921;
1922; FMACALL32_BDVER2-LABEL: test_v8f64:
1923; FMACALL32_BDVER2:       ## %bb.0: ## %entry
1924; FMACALL32_BDVER2-NEXT:    pushl %ebp ## encoding: [0x55]
1925; FMACALL32_BDVER2-NEXT:    movl %esp, %ebp ## encoding: [0x89,0xe5]
1926; FMACALL32_BDVER2-NEXT:    andl $-32, %esp ## encoding: [0x83,0xe4,0xe0]
1927; FMACALL32_BDVER2-NEXT:    subl $352, %esp ## encoding: [0x81,0xec,0x60,0x01,0x00,0x00]
1928; FMACALL32_BDVER2-NEXT:    ## imm = 0x160
1929; FMACALL32_BDVER2-NEXT:    vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1930; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x84,0x24,0xe0,0x00,0x00,0x00]
1931; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1932; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x45,0x38]
1933; FMACALL32_BDVER2-NEXT:    vmovaps %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1934; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x94,0x24,0x00,0x01,0x00,0x00]
1935; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm3, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xda,0x01]
1936; FMACALL32_BDVER2-NEXT:    vmovaps %ymm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1937; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x9c,0x24,0xc0,0x00,0x00,0x00]
1938; FMACALL32_BDVER2-NEXT:    vmovaps %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill
1939; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x29,0x8c,0x24,0xa0,0x00,0x00,0x00]
1940; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1941; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x94,0x24,0x30,0x01,0x00,0x00]
1942; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
1943; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm1, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc8,0x01]
1944; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1945; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0x40,0x01,0x00,0x00]
1946; FMACALL32_BDVER2-NEXT:    vmovlhps %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc2]
1947; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],xmm2[0]
1948; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1949; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1950; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1951; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1952; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1953; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x45,0x30]
1954; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1955; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x94,0x00,0x00,0x00]
1956; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
1957; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1958; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xa0,0x00,0x00,0x00]
1959; FMACALL32_BDVER2-NEXT:    vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
1960; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0xc0,0x00,0x00,0x00]
1961; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[1],mem[1]
1962; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1963; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1964; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1965; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1966; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1967; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x45,0x28]
1968; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1969; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0x88,0x00,0x00,0x00]
1970; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
1971; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1972; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xa0,0x00,0x00,0x00]
1973; FMACALL32_BDVER2-NEXT:    vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
1974; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0xc0,0x00,0x00,0x00]
1975; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0]
1976; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1977; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1978; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
1979; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
1980; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
1981; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x45,0x20]
1982; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
1983; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xc0,0x00,0x00,0x00]
1984; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
1985; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1986; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x00,0x01,0x00,0x00]
1987; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm0, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
1988; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
1989; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00]
1990; FMACALL32_BDVER2-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1991; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x20]
1992; FMACALL32_BDVER2-NEXT:    vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
1993; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
1994; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x30]
1995; FMACALL32_BDVER2-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x15,0xc1]
1996; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[1],xmm1[1]
1997; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
1998; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
1999; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
2000; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
2001; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2002; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x45,0x18]
2003; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
2004; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xbc,0x24,0xa0,0x00,0x00,0x00]
2005; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
2006; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
2007; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30]
2008; FMACALL32_BDVER2-NEXT:    vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
2009; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x14,0x44,0x24,0x20]
2010; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0]
2011; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
2012; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
2013; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
2014; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2015; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x45,0x10]
2016; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
2017; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x30]
2018; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
2019; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
2020; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00]
2021; FMACALL32_BDVER2-NEXT:    vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
2022; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0x00,0x01,0x00,0x00]
2023; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[1],mem[1]
2024; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
2025; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
2026; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
2027; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
2028; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2029; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x45,0x08]
2030; FMACALL32_BDVER2-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill
2031; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x7c,0x24,0x20]
2032; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
2033; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload
2034; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00]
2035; FMACALL32_BDVER2-NEXT:    vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
2036; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0x00,0x01,0x00,0x00]
2037; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],mem[0]
2038; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
2039; FMACALL32_BDVER2-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
2040; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
2041; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
2042; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2043; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x45,0x40]
2044; FMACALL32_BDVER2-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfb,0x11,0x44,0x24,0x10]
2045; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
2046; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x30,0x01,0x00,0x00]
2047; FMACALL32_BDVER2-NEXT:    vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
2048; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x12,0x84,0x24,0x48,0x01,0x00,0x00]
2049; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0,1],xmm0[2,3]
2050; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
2051; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x60]
2052; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
2053; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x20]
2054; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x58]
2055; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
2056; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0x6c,0x24,0x30]
2057; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x50]
2058; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
2059; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xa0,0x00,0x00,0x00]
2060; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x48]
2061; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
2062; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0xc0,0x00,0x00,0x00]
2063; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x9c,0x24,0x80,0x00,0x00,0x00]
2064; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
2065; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x88,0x00,0x00,0x00]
2066; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x78]
2067; FMACALL32_BDVER2-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload
2068; FMACALL32_BDVER2-NEXT:    ## encoding: [0xdb,0xac,0x24,0x94,0x00,0x00,0x00]
2069; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x70]
2070; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
2071; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
2072; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x68]
2073; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
2074; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x60]
2075; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
2076; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x50]
2077; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0x44,0x24,0x58]
2078; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0,1]
2079; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x16,0x4c,0x24,0x48]
2080; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1],mem[0,1]
2081; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
2082; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x54,0x24,0x70]
2083; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x16,0x54,0x24,0x68]
2084; FMACALL32_BDVER2-NEXT:    ## xmm2 = xmm2[0,1],mem[0,1]
2085; FMACALL32_BDVER2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
2086; FMACALL32_BDVER2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
2087; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xfb,0x10,0x8c,0x24,0x80,0x00,0x00,0x00]
2088; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x16,0x4c,0x24,0x78]
2089; FMACALL32_BDVER2-NEXT:    ## xmm1 = xmm1[0,1],mem[0,1]
2090; FMACALL32_BDVER2-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1 ## encoding: [0xc4,0xe3,0x75,0x18,0xca,0x01]
2091; FMACALL32_BDVER2-NEXT:    movl %ebp, %esp ## encoding: [0x89,0xec]
2092; FMACALL32_BDVER2-NEXT:    popl %ebp ## encoding: [0x5d]
2093; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
2094entry:
2095  %call = call <8 x double> @llvm.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c)
2096  ret <8 x double> %call
2097}
2098
2099define float @constant_fold_f32() {
2100; FMA32-LABEL: constant_fold_f32:
2101; FMA32:       ## %bb.0:
2102; FMA32-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}} ## encoding: [0xd9,0x05,A,A,A,A]
2103; FMA32-NEXT:    ## fixup A - offset: 2, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2104; FMA32-NEXT:    retl ## encoding: [0xc3]
2105;
2106; FMACALL32-LABEL: constant_fold_f32:
2107; FMACALL32:       ## %bb.0:
2108; FMACALL32-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}} ## encoding: [0xd9,0x05,A,A,A,A]
2109; FMACALL32-NEXT:    ## fixup A - offset: 2, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2110; FMACALL32-NEXT:    retl ## encoding: [0xc3]
2111;
2112; FMA64-LABEL: constant_fold_f32:
2113; FMA64:       ## %bb.0:
2114; FMA64-NEXT:    vmovss {{.*#+}} xmm0 = [1.02E+3,0.0E+0,0.0E+0,0.0E+0]
2115; FMA64-NEXT:    ## encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
2116; FMA64-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2117; FMA64-NEXT:    retq ## encoding: [0xc3]
2118;
2119; FMACALL64-LABEL: constant_fold_f32:
2120; FMACALL64:       ## %bb.0:
2121; FMACALL64-NEXT:    movss {{.*#+}} xmm0 = [1.02E+3,0.0E+0,0.0E+0,0.0E+0]
2122; FMACALL64-NEXT:    ## encoding: [0xf3,0x0f,0x10,0x05,A,A,A,A]
2123; FMACALL64-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2124; FMACALL64-NEXT:    retq ## encoding: [0xc3]
2125;
2126; AVX512-LABEL: constant_fold_f32:
2127; AVX512:       ## %bb.0:
2128; AVX512-NEXT:    vmovss {{.*#+}} xmm0 = [1.02E+3,0.0E+0,0.0E+0,0.0E+0]
2129; AVX512-NEXT:    ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
2130; AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2131; AVX512-NEXT:    retq ## encoding: [0xc3]
2132;
2133; AVX512VL-LABEL: constant_fold_f32:
2134; AVX512VL:       ## %bb.0:
2135; AVX512VL-NEXT:    vmovss {{.*#+}} xmm0 = [1.02E+3,0.0E+0,0.0E+0,0.0E+0]
2136; AVX512VL-NEXT:    ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
2137; AVX512VL-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2138; AVX512VL-NEXT:    retq ## encoding: [0xc3]
2139  %r  = call float @llvm.fma.f32(float 5.000000e+01, float 2.000000e+01, float 2.000000e+01)
2140  ret float %r
2141}
2142
2143define <4 x float> @constant_fold_v4f32() {
2144; FMA32-LABEL: constant_fold_v4f32:
2145; FMA32:       ## %bb.0:
2146; FMA32-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3]
2147; FMA32-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
2148; FMA32-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2149; FMA32-NEXT:    retl ## encoding: [0xc3]
2150;
2151; FMACALL32-LABEL: constant_fold_v4f32:
2152; FMACALL32:       ## %bb.0:
2153; FMACALL32-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3]
2154; FMACALL32-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
2155; FMACALL32-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2156; FMACALL32-NEXT:    retl ## encoding: [0xc3]
2157;
2158; FMA64-LABEL: constant_fold_v4f32:
2159; FMA64:       ## %bb.0:
2160; FMA64-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3]
2161; FMA64-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
2162; FMA64-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2163; FMA64-NEXT:    retq ## encoding: [0xc3]
2164;
2165; FMACALL64-LABEL: constant_fold_v4f32:
2166; FMACALL64:       ## %bb.0:
2167; FMACALL64-NEXT:    movaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3]
2168; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
2169; FMACALL64-NEXT:    ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2170; FMACALL64-NEXT:    retq ## encoding: [0xc3]
2171;
2172; AVX512-LABEL: constant_fold_v4f32:
2173; AVX512:       ## %bb.0:
2174; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3]
2175; AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
2176; AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2177; AVX512-NEXT:    retq ## encoding: [0xc3]
2178;
2179; AVX512VL-LABEL: constant_fold_v4f32:
2180; AVX512VL:       ## %bb.0:
2181; AVX512VL-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3]
2182; AVX512VL-NEXT:    ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
2183; AVX512VL-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2184; AVX512VL-NEXT:    retq ## encoding: [0xc3]
2185  %r  = call <4 x float> @llvm.fma.v4f32(<4 x float> <float 0.000000e+01, float 1.000000e+01, float 2.000000e+01, float 3.000000e+01>, <4 x float> <float 4.000000e+01, float 5.000000e+01, float 6.000000e+01, float 7.000000e+01>, <4 x float> <float 0.000000e+01, float -1.000000e+01, float -2.000000e+01, float -3.000000e+01>)
2186  ret <4 x float> %r
2187}
2188
2189define <2 x double> @constant_fold_v2f64() {
2190; FMA32-LABEL: constant_fold_v2f64:
2191; FMA32:       ## %bb.0:
2192; FMA32-NEXT:    vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3]
2193; FMA32-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
2194; FMA32-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2195; FMA32-NEXT:    retl ## encoding: [0xc3]
2196;
2197; FMACALL32-LABEL: constant_fold_v2f64:
2198; FMACALL32:       ## %bb.0:
2199; FMACALL32-NEXT:    vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3]
2200; FMACALL32-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
2201; FMACALL32-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2202; FMACALL32-NEXT:    retl ## encoding: [0xc3]
2203;
2204; FMA64-LABEL: constant_fold_v2f64:
2205; FMA64:       ## %bb.0:
2206; FMA64-NEXT:    vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3]
2207; FMA64-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
2208; FMA64-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2209; FMA64-NEXT:    retq ## encoding: [0xc3]
2210;
2211; FMACALL64-LABEL: constant_fold_v2f64:
2212; FMACALL64:       ## %bb.0:
2213; FMACALL64-NEXT:    movaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3]
2214; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
2215; FMACALL64-NEXT:    ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2216; FMACALL64-NEXT:    retq ## encoding: [0xc3]
2217;
2218; AVX512-LABEL: constant_fold_v2f64:
2219; AVX512:       ## %bb.0:
2220; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3]
2221; AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
2222; AVX512-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2223; AVX512-NEXT:    retq ## encoding: [0xc3]
2224;
2225; AVX512VL-LABEL: constant_fold_v2f64:
2226; AVX512VL:       ## %bb.0:
2227; AVX512VL-NEXT:    vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3]
2228; AVX512VL-NEXT:    ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
2229; AVX512VL-NEXT:    ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2230; AVX512VL-NEXT:    retq ## encoding: [0xc3]
2231  %r  = call <2 x double> @llvm.fma.v2f64(<2 x double> <double 1.000000e+01, double 2.000000e+01>, <2 x double> <double 4.000000e+01, double 7.000000e+01>, <2 x double> <double 1.000000e+01, double 0.000000e+01>)
2232  ret <2 x double> %r
2233}
2234
2235declare float @llvm.fma.f32(float, float, float)
2236declare double @llvm.fma.f64(double, double, double)
2237declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80)
2238
2239declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
2240declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
2241declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
2242
2243declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
2244declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
2245declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>)
2246
2247attributes #0 = { nounwind }
2248