xref: /llvm-project/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll (revision 809c5ac3b0d78f504d93717ac4c0a02816cf47bb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s
3; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
5; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512dq < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ
6
7define <1 x float> @constrained_vector_fdiv_v1f32() #0 {
8; CHECK-LABEL: constrained_vector_fdiv_v1f32:
9; CHECK:       # %bb.0: # %entry
10; CHECK-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
11; CHECK-NEXT:    divss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12; CHECK-NEXT:    retq
13;
14; AVX-LABEL: constrained_vector_fdiv_v1f32:
15; AVX:       # %bb.0: # %entry
16; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
17; AVX-NEXT:    vdivss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18; AVX-NEXT:    retq
19entry:
20  %div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32(
21           <1 x float> <float 1.000000e+00>,
22           <1 x float> <float 1.000000e+01>,
23           metadata !"round.dynamic",
24           metadata !"fpexcept.strict") #0
25  ret <1 x float> %div
26}
27
28define <2 x double> @constrained_vector_fdiv_v2f64() #0 {
29; CHECK-LABEL: constrained_vector_fdiv_v2f64:
30; CHECK:       # %bb.0: # %entry
31; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
32; CHECK-NEXT:    divpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33; CHECK-NEXT:    retq
34;
35; AVX-LABEL: constrained_vector_fdiv_v2f64:
36; AVX:       # %bb.0: # %entry
37; AVX-NEXT:    vmovapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
38; AVX-NEXT:    vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
39; AVX-NEXT:    retq
40entry:
41  %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
42           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
43           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
44           metadata !"round.dynamic",
45           metadata !"fpexcept.strict") #0
46  ret <2 x double> %div
47}
48
49define <3 x float> @constrained_vector_fdiv_v3f32() #0 {
50; CHECK-LABEL: constrained_vector_fdiv_v3f32:
51; CHECK:       # %bb.0: # %entry
52; CHECK-NEXT:    movss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0]
53; CHECK-NEXT:    movss {{.*#+}} xmm2 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
54; CHECK-NEXT:    divss %xmm1, %xmm2
55; CHECK-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
56; CHECK-NEXT:    divss %xmm1, %xmm0
57; CHECK-NEXT:    movss {{.*#+}} xmm3 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
58; CHECK-NEXT:    divss %xmm1, %xmm3
59; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
60; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
61; CHECK-NEXT:    retq
62;
63; AVX-LABEL: constrained_vector_fdiv_v3f32:
64; AVX:       # %bb.0: # %entry
65; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0]
66; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
67; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm1
68; AVX-NEXT:    vmovss {{.*#+}} xmm2 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
69; AVX-NEXT:    vdivss %xmm0, %xmm2, %xmm2
70; AVX-NEXT:    vmovss {{.*#+}} xmm3 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
71; AVX-NEXT:    vdivss %xmm0, %xmm3, %xmm0
72; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
73; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
74; AVX-NEXT:    retq
75entry:
76  %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32(
77           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
78           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
79           metadata !"round.dynamic",
80           metadata !"fpexcept.strict") #0
81  ret <3 x float> %div
82}
83
84define <3 x double> @constrained_vector_fdiv_v3f64() #0 {
85; CHECK-LABEL: constrained_vector_fdiv_v3f64:
86; CHECK:       # %bb.0: # %entry
87; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
88; CHECK-NEXT:    divpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
89; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
90; CHECK-NEXT:    divsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
91; CHECK-NEXT:    movsd %xmm1, -{{[0-9]+}}(%rsp)
92; CHECK-NEXT:    movapd %xmm0, %xmm1
93; CHECK-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
94; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
95; CHECK-NEXT:    wait
96; CHECK-NEXT:    retq
97;
98; AVX-LABEL: constrained_vector_fdiv_v3f64:
99; AVX:       # %bb.0: # %entry
100; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
101; AVX-NEXT:    vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
102; AVX-NEXT:    vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0]
103; AVX-NEXT:    vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
104; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
105; AVX-NEXT:    retq
106entry:
107  %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64(
108           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
109           <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>,
110           metadata !"round.dynamic",
111           metadata !"fpexcept.strict") #0
112  ret <3 x double> %div
113}
114
115define <4 x double> @constrained_vector_fdiv_v4f64() #0 {
116; CHECK-LABEL: constrained_vector_fdiv_v4f64:
117; CHECK:       # %bb.0: # %entry
118; CHECK-NEXT:    movapd {{.*#+}} xmm2 = [1.0E+1,1.0E+1]
119; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [3.0E+0,4.0E+0]
120; CHECK-NEXT:    divpd %xmm2, %xmm1
121; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0]
122; CHECK-NEXT:    divpd %xmm2, %xmm0
123; CHECK-NEXT:    retq
124;
125; AVX1-LABEL: constrained_vector_fdiv_v4f64:
126; AVX1:       # %bb.0: # %entry
127; AVX1-NEXT:    vmovapd {{.*#+}} ymm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
128; AVX1-NEXT:    vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
129; AVX1-NEXT:    retq
130;
131; AVX512-LABEL: constrained_vector_fdiv_v4f64:
132; AVX512:       # %bb.0: # %entry
133; AVX512-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [1.0E+1,1.0E+1,1.0E+1,1.0E+1]
134; AVX512-NEXT:    vmovapd {{.*#+}} ymm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
135; AVX512-NEXT:    vdivpd %ymm0, %ymm1, %ymm0
136; AVX512-NEXT:    retq
137entry:
138  %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
139           <4 x double> <double 1.000000e+00, double 2.000000e+00,
140                         double 3.000000e+00, double 4.000000e+00>,
141           <4 x double> <double 1.000000e+01, double 1.000000e+01,
142                         double 1.000000e+01, double 1.000000e+01>,
143           metadata !"round.dynamic",
144           metadata !"fpexcept.strict") #0
145  ret <4 x double> %div
146}
147
148define <1 x float> @constrained_vector_frem_v1f32() #0 {
149; CHECK-LABEL: constrained_vector_frem_v1f32:
150; CHECK:       # %bb.0: # %entry
151; CHECK-NEXT:    pushq %rax
152; CHECK-NEXT:    .cfi_def_cfa_offset 16
153; CHECK-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
154; CHECK-NEXT:    movss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0]
155; CHECK-NEXT:    callq fmodf@PLT
156; CHECK-NEXT:    popq %rax
157; CHECK-NEXT:    .cfi_def_cfa_offset 8
158; CHECK-NEXT:    retq
159;
160; AVX-LABEL: constrained_vector_frem_v1f32:
161; AVX:       # %bb.0: # %entry
162; AVX-NEXT:    pushq %rax
163; AVX-NEXT:    .cfi_def_cfa_offset 16
164; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
165; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0]
166; AVX-NEXT:    callq fmodf@PLT
167; AVX-NEXT:    popq %rax
168; AVX-NEXT:    .cfi_def_cfa_offset 8
169; AVX-NEXT:    retq
170entry:
171  %rem = call <1 x float> @llvm.experimental.constrained.frem.v1f32(
172           <1 x float> <float 1.000000e+00>,
173           <1 x float> <float 1.000000e+01>,
174           metadata !"round.dynamic",
175           metadata !"fpexcept.strict") #0
176  ret <1 x float> %rem
177}
178
179define <2 x double> @constrained_vector_frem_v2f64() #0 {
180; CHECK-LABEL: constrained_vector_frem_v2f64:
181; CHECK:       # %bb.0: # %entry
182; CHECK-NEXT:    subq $24, %rsp
183; CHECK-NEXT:    .cfi_def_cfa_offset 32
184; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [2.0E+0,0.0E+0]
185; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
186; CHECK-NEXT:    callq fmod@PLT
187; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
188; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
189; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
190; CHECK-NEXT:    callq fmod@PLT
191; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
192; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
193; CHECK-NEXT:    addq $24, %rsp
194; CHECK-NEXT:    .cfi_def_cfa_offset 8
195; CHECK-NEXT:    retq
196;
197; AVX-LABEL: constrained_vector_frem_v2f64:
198; AVX:       # %bb.0: # %entry
199; AVX-NEXT:    subq $24, %rsp
200; AVX-NEXT:    .cfi_def_cfa_offset 32
201; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [2.0E+0,0.0E+0]
202; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
203; AVX-NEXT:    callq fmod@PLT
204; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
205; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
206; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
207; AVX-NEXT:    callq fmod@PLT
208; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
209; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
210; AVX-NEXT:    addq $24, %rsp
211; AVX-NEXT:    .cfi_def_cfa_offset 8
212; AVX-NEXT:    retq
213entry:
214  %rem = call <2 x double> @llvm.experimental.constrained.frem.v2f64(
215           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
216           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
217           metadata !"round.dynamic",
218           metadata !"fpexcept.strict") #0
219  ret <2 x double> %rem
220}
221
222define <3 x float> @constrained_vector_frem_v3f32() #0 {
223; CHECK-LABEL: constrained_vector_frem_v3f32:
224; CHECK:       # %bb.0: # %entry
225; CHECK-NEXT:    subq $40, %rsp
226; CHECK-NEXT:    .cfi_def_cfa_offset 48
227; CHECK-NEXT:    movss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
228; CHECK-NEXT:    movss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0]
229; CHECK-NEXT:    callq fmodf@PLT
230; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
231; CHECK-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
232; CHECK-NEXT:    movss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0]
233; CHECK-NEXT:    callq fmodf@PLT
234; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
235; CHECK-NEXT:    movss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
236; CHECK-NEXT:    movss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0]
237; CHECK-NEXT:    callq fmodf@PLT
238; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
239; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
240; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
241; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
242; CHECK-NEXT:    movaps %xmm1, %xmm0
243; CHECK-NEXT:    addq $40, %rsp
244; CHECK-NEXT:    .cfi_def_cfa_offset 8
245; CHECK-NEXT:    retq
246;
247; AVX-LABEL: constrained_vector_frem_v3f32:
248; AVX:       # %bb.0: # %entry
249; AVX-NEXT:    subq $40, %rsp
250; AVX-NEXT:    .cfi_def_cfa_offset 48
251; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
252; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0]
253; AVX-NEXT:    callq fmodf@PLT
254; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
255; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
256; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0]
257; AVX-NEXT:    callq fmodf@PLT
258; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
259; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
260; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0]
261; AVX-NEXT:    callq fmodf@PLT
262; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
263; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
264; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
265; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
266; AVX-NEXT:    addq $40, %rsp
267; AVX-NEXT:    .cfi_def_cfa_offset 8
268; AVX-NEXT:    retq
269entry:
270  %rem = call <3 x float> @llvm.experimental.constrained.frem.v3f32(
271           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
272           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
273           metadata !"round.dynamic",
274           metadata !"fpexcept.strict") #0
275  ret <3 x float> %rem
276}
277
278define <3 x double> @constrained_vector_frem_v3f64() #0 {
279; CHECK-LABEL: constrained_vector_frem_v3f64:
280; CHECK:       # %bb.0: # %entry
281; CHECK-NEXT:    subq $24, %rsp
282; CHECK-NEXT:    .cfi_def_cfa_offset 32
283; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [2.0E+0,0.0E+0]
284; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
285; CHECK-NEXT:    callq fmod@PLT
286; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
287; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
288; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
289; CHECK-NEXT:    callq fmod@PLT
290; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
291; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
292; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
293; CHECK-NEXT:    callq fmod@PLT
294; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
295; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
296; CHECK-NEXT:    wait
297; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
298; CHECK-NEXT:    # xmm0 = mem[0],zero
299; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
300; CHECK-NEXT:    # xmm1 = mem[0],zero
301; CHECK-NEXT:    addq $24, %rsp
302; CHECK-NEXT:    .cfi_def_cfa_offset 8
303; CHECK-NEXT:    retq
304;
305; AVX-LABEL: constrained_vector_frem_v3f64:
306; AVX:       # %bb.0: # %entry
307; AVX-NEXT:    subq $40, %rsp
308; AVX-NEXT:    .cfi_def_cfa_offset 48
309; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [2.0E+0,0.0E+0]
310; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
311; AVX-NEXT:    callq fmod@PLT
312; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
313; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
314; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
315; AVX-NEXT:    callq fmod@PLT
316; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
317; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
318; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
319; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
320; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
321; AVX-NEXT:    vzeroupper
322; AVX-NEXT:    callq fmod@PLT
323; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
324; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
325; AVX-NEXT:    addq $40, %rsp
326; AVX-NEXT:    .cfi_def_cfa_offset 8
327; AVX-NEXT:    retq
328entry:
329  %rem = call <3 x double> @llvm.experimental.constrained.frem.v3f64(
330           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
331           <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>,
332           metadata !"round.dynamic",
333           metadata !"fpexcept.strict") #0
334  ret <3 x double> %rem
335}
336
337define <4 x double> @constrained_vector_frem_v4f64() #0 {
338; CHECK-LABEL: constrained_vector_frem_v4f64:
339; CHECK:       # %bb.0:
340; CHECK-NEXT:    subq $40, %rsp
341; CHECK-NEXT:    .cfi_def_cfa_offset 48
342; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [2.0E+0,0.0E+0]
343; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
344; CHECK-NEXT:    callq fmod@PLT
345; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
346; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
347; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
348; CHECK-NEXT:    callq fmod@PLT
349; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
350; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
351; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
352; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.0E+0,0.0E+0]
353; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
354; CHECK-NEXT:    callq fmod@PLT
355; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
356; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
357; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
358; CHECK-NEXT:    callq fmod@PLT
359; CHECK-NEXT:    movaps %xmm0, %xmm1
360; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
361; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
362; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
363; CHECK-NEXT:    addq $40, %rsp
364; CHECK-NEXT:    .cfi_def_cfa_offset 8
365; CHECK-NEXT:    retq
366;
367; AVX-LABEL: constrained_vector_frem_v4f64:
368; AVX:       # %bb.0:
369; AVX-NEXT:    subq $40, %rsp
370; AVX-NEXT:    .cfi_def_cfa_offset 48
371; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.0E+0,0.0E+0]
372; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
373; AVX-NEXT:    callq fmod@PLT
374; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
375; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
376; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
377; AVX-NEXT:    callq fmod@PLT
378; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
379; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
380; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
381; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [2.0E+0,0.0E+0]
382; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
383; AVX-NEXT:    callq fmod@PLT
384; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
385; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
386; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0]
387; AVX-NEXT:    callq fmod@PLT
388; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
389; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
390; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
391; AVX-NEXT:    addq $40, %rsp
392; AVX-NEXT:    .cfi_def_cfa_offset 8
393; AVX-NEXT:    retq
394  %rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64(
395           <4 x double> <double 1.000000e+00, double 2.000000e+00,
396                         double 3.000000e+00, double 4.000000e+00>,
397           <4 x double> <double 1.000000e+01, double 1.000000e+01,
398                         double 1.000000e+01, double 1.000000e+01>,
399           metadata !"round.dynamic",
400           metadata !"fpexcept.strict") #0
401  ret <4 x double> %rem
402}
403
404define <1 x float> @constrained_vector_fmul_v1f32() #0 {
405; CHECK-LABEL: constrained_vector_fmul_v1f32:
406; CHECK:       # %bb.0: # %entry
407; CHECK-NEXT:    movss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0]
408; CHECK-NEXT:    mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
409; CHECK-NEXT:    retq
410;
411; AVX-LABEL: constrained_vector_fmul_v1f32:
412; AVX:       # %bb.0: # %entry
413; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0]
414; AVX-NEXT:    vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
415; AVX-NEXT:    retq
416entry:
417  %mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32(
418           <1 x float> <float 0x7FF0000000000000>,
419           <1 x float> <float 2.000000e+00>,
420           metadata !"round.dynamic",
421           metadata !"fpexcept.strict") #0
422  ret <1 x float> %mul
423}
424
425define <2 x double> @constrained_vector_fmul_v2f64() #0 {
426; CHECK-LABEL: constrained_vector_fmul_v2f64:
427; CHECK:       # %bb.0: # %entry
428; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
429; CHECK-NEXT:    mulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
430; CHECK-NEXT:    retq
431;
432; AVX-LABEL: constrained_vector_fmul_v2f64:
433; AVX:       # %bb.0: # %entry
434; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
435; AVX-NEXT:    # xmm0 = mem[0,0]
436; AVX-NEXT:    vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
437; AVX-NEXT:    retq
438entry:
439  %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
440           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
441           <2 x double> <double 2.000000e+00, double 3.000000e+00>,
442           metadata !"round.dynamic",
443           metadata !"fpexcept.strict") #0
444  ret <2 x double> %mul
445}
446
447define <3 x float> @constrained_vector_fmul_v3f32() #0 {
448; CHECK-LABEL: constrained_vector_fmul_v3f32:
449; CHECK:       # %bb.0: # %entry
450; CHECK-NEXT:    movss {{.*#+}} xmm1 = [+Inf,0.0E+0,0.0E+0,0.0E+0]
451; CHECK-NEXT:    movss {{.*#+}} xmm2 = [1.0E+2,0.0E+0,0.0E+0,0.0E+0]
452; CHECK-NEXT:    mulss %xmm1, %xmm2
453; CHECK-NEXT:    movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
454; CHECK-NEXT:    mulss %xmm1, %xmm0
455; CHECK-NEXT:    mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
456; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
457; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
458; CHECK-NEXT:    retq
459;
460; AVX-LABEL: constrained_vector_fmul_v3f32:
461; AVX:       # %bb.0: # %entry
462; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0]
463; AVX-NEXT:    vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
464; AVX-NEXT:    vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
465; AVX-NEXT:    vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
466; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3]
467; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
468; AVX-NEXT:    retq
469entry:
470  %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(
471           <3 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000,
472                        float 0x7FF0000000000000>,
473           <3 x float> <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02>,
474           metadata !"round.dynamic",
475           metadata !"fpexcept.strict") #0
476  ret <3 x float> %mul
477}
478
479define <3 x double> @constrained_vector_fmul_v3f64() #0 {
480; CHECK-LABEL: constrained_vector_fmul_v3f64:
481; CHECK:       # %bb.0: # %entry
482; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
483; CHECK-NEXT:    mulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
484; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [1.7976931348623157E+308,0.0E+0]
485; CHECK-NEXT:    mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
486; CHECK-NEXT:    movsd %xmm1, -{{[0-9]+}}(%rsp)
487; CHECK-NEXT:    movapd %xmm0, %xmm1
488; CHECK-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
489; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
490; CHECK-NEXT:    wait
491; CHECK-NEXT:    retq
492;
493; AVX-LABEL: constrained_vector_fmul_v3f64:
494; AVX:       # %bb.0: # %entry
495; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [1.7976931348623157E+308,0.0E+0]
496; AVX-NEXT:    vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
497; AVX-NEXT:    vmovddup {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
498; AVX-NEXT:    # xmm1 = mem[0,0]
499; AVX-NEXT:    vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
500; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
501; AVX-NEXT:    retq
502entry:
503  %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64(
504           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
505                         double 0x7FEFFFFFFFFFFFFF>,
506           <3 x double> <double 1.000000e+00, double 1.000000e+01, double 1.000000e+02>,
507           metadata !"round.dynamic",
508           metadata !"fpexcept.strict") #0
509  ret <3 x double> %mul
510}
511
512define <4 x double> @constrained_vector_fmul_v4f64() #0 {
513; CHECK-LABEL: constrained_vector_fmul_v4f64:
514; CHECK:       # %bb.0: # %entry
515; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
516; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [4.0E+0,5.0E+0]
517; CHECK-NEXT:    mulpd %xmm0, %xmm1
518; CHECK-NEXT:    mulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
519; CHECK-NEXT:    retq
520;
521; AVX-LABEL: constrained_vector_fmul_v4f64:
522; AVX:       # %bb.0: # %entry
523; AVX-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
524; AVX-NEXT:    vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
525; AVX-NEXT:    retq
526entry:
527  %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
528           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
529                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
530           <4 x double> <double 2.000000e+00, double 3.000000e+00,
531                         double 4.000000e+00, double 5.000000e+00>,
532           metadata !"round.dynamic",
533           metadata !"fpexcept.strict") #0
534  ret <4 x double> %mul
535}
536
537define <1 x float> @constrained_vector_fadd_v1f32() #0 {
538; CHECK-LABEL: constrained_vector_fadd_v1f32:
539; CHECK:       # %bb.0: # %entry
540; CHECK-NEXT:    movss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0]
541; CHECK-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
542; CHECK-NEXT:    retq
543;
544; AVX-LABEL: constrained_vector_fadd_v1f32:
545; AVX:       # %bb.0: # %entry
546; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0]
547; AVX-NEXT:    vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
548; AVX-NEXT:    retq
549entry:
550  %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32(
551           <1 x float> <float 0x7FF0000000000000>,
552           <1 x float> <float 1.0>,
553           metadata !"round.dynamic",
554           metadata !"fpexcept.strict") #0
555  ret <1 x float> %add
556}
557
558define <2 x double> @constrained_vector_fadd_v2f64() #0 {
559; CHECK-LABEL: constrained_vector_fadd_v2f64:
560; CHECK:       # %bb.0: # %entry
561; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
562; CHECK-NEXT:    addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
563; CHECK-NEXT:    retq
564;
565; AVX-LABEL: constrained_vector_fadd_v2f64:
566; AVX:       # %bb.0: # %entry
567; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
568; AVX-NEXT:    # xmm0 = mem[0,0]
569; AVX-NEXT:    vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
570; AVX-NEXT:    retq
571entry:
572  %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
573           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
574           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
575           metadata !"round.dynamic",
576           metadata !"fpexcept.strict") #0
577  ret <2 x double> %add
578}
579
580define <3 x float> @constrained_vector_fadd_v3f32() #0 {
581; CHECK-LABEL: constrained_vector_fadd_v3f32:
582; CHECK:       # %bb.0: # %entry
583; CHECK-NEXT:    xorps %xmm1, %xmm1
584; CHECK-NEXT:    movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
585; CHECK-NEXT:    addss %xmm2, %xmm1
586; CHECK-NEXT:    movss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
587; CHECK-NEXT:    addss %xmm2, %xmm0
588; CHECK-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
589; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
590; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
591; CHECK-NEXT:    retq
592;
593; AVX-LABEL: constrained_vector_fadd_v3f32:
594; AVX:       # %bb.0: # %entry
595; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
596; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0]
597; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
598; AVX-NEXT:    vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
599; AVX-NEXT:    vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
600; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
601; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
602; AVX-NEXT:    retq
603entry:
604  %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
605           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
606                        float 0xFFFFFFFFE0000000>,
607           <3 x float> <float 2.0, float 1.0, float 0.0>,
608           metadata !"round.dynamic",
609           metadata !"fpexcept.strict") #0
610  ret <3 x float> %add
611}
612
613define <3 x double> @constrained_vector_fadd_v3f64() #0 {
614; CHECK-LABEL: constrained_vector_fadd_v3f64:
615; CHECK:       # %bb.0: # %entry
616; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
617; CHECK-NEXT:    addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
618; CHECK-NEXT:    xorpd %xmm1, %xmm1
619; CHECK-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
620; CHECK-NEXT:    movsd %xmm1, -{{[0-9]+}}(%rsp)
621; CHECK-NEXT:    movapd %xmm0, %xmm1
622; CHECK-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
623; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
624; CHECK-NEXT:    wait
625; CHECK-NEXT:    retq
626;
627; AVX-LABEL: constrained_vector_fadd_v3f64:
628; AVX:       # %bb.0: # %entry
629; AVX-NEXT:    vxorpd %xmm0, %xmm0, %xmm0
630; AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
631; AVX-NEXT:    vmovddup {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
632; AVX-NEXT:    # xmm1 = mem[0,0]
633; AVX-NEXT:    vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
634; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
635; AVX-NEXT:    retq
636entry:
637  %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(
638           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
639                         double 0x7FEFFFFFFFFFFFFF>,
640           <3 x double> <double 2.0, double 1.0, double 0.0>,
641           metadata !"round.dynamic",
642           metadata !"fpexcept.strict") #0
643  ret <3 x double> %add
644}
645
646define <4 x double> @constrained_vector_fadd_v4f64() #0 {
647; CHECK-LABEL: constrained_vector_fadd_v4f64:
648; CHECK:       # %bb.0: # %entry
649; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
650; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [2.0E+0,2.0000000000000001E-1]
651; CHECK-NEXT:    addpd %xmm0, %xmm1
652; CHECK-NEXT:    addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
653; CHECK-NEXT:    retq
654;
655; AVX-LABEL: constrained_vector_fadd_v4f64:
656; AVX:       # %bb.0: # %entry
657; AVX-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
658; AVX-NEXT:    vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
659; AVX-NEXT:    retq
660entry:
661  %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
662           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
663                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
664           <4 x double> <double 1.000000e+00, double 1.000000e-01,
665                         double 2.000000e+00, double 2.000000e-01>,
666           metadata !"round.dynamic",
667           metadata !"fpexcept.strict") #0
668  ret <4 x double> %add
669}
670
671define <1 x float> @constrained_vector_fsub_v1f32() #0 {
672; CHECK-LABEL: constrained_vector_fsub_v1f32:
673; CHECK:       # %bb.0: # %entry
674; CHECK-NEXT:    movss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0]
675; CHECK-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
676; CHECK-NEXT:    retq
677;
678; AVX-LABEL: constrained_vector_fsub_v1f32:
679; AVX:       # %bb.0: # %entry
680; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0]
681; AVX-NEXT:    vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
682; AVX-NEXT:    retq
683entry:
684  %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32(
685           <1 x float> <float 0x7FF0000000000000>,
686           <1 x float> <float 1.000000e+00>,
687           metadata !"round.dynamic",
688           metadata !"fpexcept.strict") #0
689  ret <1 x float> %sub
690}
691
692define <2 x double> @constrained_vector_fsub_v2f64() #0 {
693; CHECK-LABEL: constrained_vector_fsub_v2f64:
694; CHECK:       # %bb.0: # %entry
695; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
696; CHECK-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
697; CHECK-NEXT:    retq
698;
699; AVX-LABEL: constrained_vector_fsub_v2f64:
700; AVX:       # %bb.0: # %entry
701; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
702; AVX-NEXT:    # xmm0 = mem[0,0]
703; AVX-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
704; AVX-NEXT:    retq
705entry:
706  %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
707           <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
708           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
709           metadata !"round.dynamic",
710           metadata !"fpexcept.strict") #0
711  ret <2 x double> %sub
712}
713
714define <3 x float> @constrained_vector_fsub_v3f32() #0 {
715; CHECK-LABEL: constrained_vector_fsub_v3f32:
716; CHECK:       # %bb.0: # %entry
717; CHECK-NEXT:    xorps %xmm0, %xmm0
718; CHECK-NEXT:    movss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0]
719; CHECK-NEXT:    movaps %xmm1, %xmm2
720; CHECK-NEXT:    subss %xmm0, %xmm2
721; CHECK-NEXT:    movaps %xmm1, %xmm0
722; CHECK-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
723; CHECK-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
724; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
725; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
726; CHECK-NEXT:    retq
727;
728; AVX-LABEL: constrained_vector_fsub_v3f32:
729; AVX:       # %bb.0: # %entry
730; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
731; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0]
732; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
733; AVX-NEXT:    vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
734; AVX-NEXT:    vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
735; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
736; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
737; AVX-NEXT:    retq
738entry:
739  %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(
740           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
741                        float 0xFFFFFFFFE0000000>,
742           <3 x float> <float 2.0, float 1.0, float 0.0>,
743           metadata !"round.dynamic",
744           metadata !"fpexcept.strict") #0
745  ret <3 x float> %sub
746}
747
748define <3 x double> @constrained_vector_fsub_v3f64() #0 {
749; CHECK-LABEL: constrained_vector_fsub_v3f64:
750; CHECK:       # %bb.0: # %entry
751; CHECK-NEXT:    xorpd %xmm0, %xmm0
752; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [-1.7976931348623157E+308,0.0E+0]
753; CHECK-NEXT:    subsd %xmm0, %xmm1
754; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
755; CHECK-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
756; CHECK-NEXT:    movsd %xmm1, -{{[0-9]+}}(%rsp)
757; CHECK-NEXT:    movapd %xmm0, %xmm1
758; CHECK-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
759; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
760; CHECK-NEXT:    wait
761; CHECK-NEXT:    retq
762;
763; AVX-LABEL: constrained_vector_fsub_v3f64:
764; AVX:       # %bb.0: # %entry
765; AVX-NEXT:    vxorpd %xmm0, %xmm0, %xmm0
766; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [-1.7976931348623157E+308,0.0E+0]
767; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
768; AVX-NEXT:    vmovddup {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
769; AVX-NEXT:    # xmm1 = mem[0,0]
770; AVX-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
771; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
772; AVX-NEXT:    retq
773entry:
774  %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(
775           <3 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
776                         double 0xFFEFFFFFFFFFFFFF>,
777           <3 x double> <double 2.0, double 1.0, double 0.0>,
778           metadata !"round.dynamic",
779           metadata !"fpexcept.strict") #0
780  ret <3 x double> %sub
781}
782
783define <4 x double> @constrained_vector_fsub_v4f64() #0 {
784; CHECK-LABEL: constrained_vector_fsub_v4f64:
785; CHECK:       # %bb.0: # %entry
786; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
787; CHECK-NEXT:    movapd %xmm0, %xmm1
788; CHECK-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
789; CHECK-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
790; CHECK-NEXT:    retq
791;
792; AVX-LABEL: constrained_vector_fsub_v4f64:
793; AVX:       # %bb.0: # %entry
794; AVX-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308]
795; AVX-NEXT:    vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
796; AVX-NEXT:    retq
797entry:
798  %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
799           <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
800                         double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
801           <4 x double> <double 1.000000e+00, double 1.000000e-01,
802                         double 2.000000e+00, double 2.000000e-01>,
803           metadata !"round.dynamic",
804           metadata !"fpexcept.strict") #0
805  ret <4 x double> %sub
806}
807
808define <1 x float> @constrained_vector_sqrt_v1f32() #0 {
809; CHECK-LABEL: constrained_vector_sqrt_v1f32:
810; CHECK:       # %bb.0: # %entry
811; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
812; CHECK-NEXT:    sqrtss %xmm0, %xmm0
813; CHECK-NEXT:    retq
814;
815; AVX-LABEL: constrained_vector_sqrt_v1f32:
816; AVX:       # %bb.0: # %entry
817; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
818; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
819; AVX-NEXT:    retq
820entry:
821  %sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32(
822                              <1 x float> <float 42.0>,
823                              metadata !"round.dynamic",
824                              metadata !"fpexcept.strict") #0
825  ret <1 x float> %sqrt
826}
827
828define <2 x double> @constrained_vector_sqrt_v2f64() #0 {
829; CHECK-LABEL: constrained_vector_sqrt_v2f64:
830; CHECK:       # %bb.0: # %entry
831; CHECK-NEXT:    sqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
832; CHECK-NEXT:    retq
833;
834; AVX-LABEL: constrained_vector_sqrt_v2f64:
835; AVX:       # %bb.0: # %entry
836; AVX-NEXT:    vsqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
837; AVX-NEXT:    retq
838entry:
839  %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
840                              <2 x double> <double 42.0, double 42.1>,
841                              metadata !"round.dynamic",
842                              metadata !"fpexcept.strict") #0
843  ret <2 x double> %sqrt
844}
845
846define <3 x float> @constrained_vector_sqrt_v3f32() #0 {
847; CHECK-LABEL: constrained_vector_sqrt_v3f32:
848; CHECK:       # %bb.0: # %entry
849; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
850; CHECK-NEXT:    sqrtss %xmm0, %xmm1
851; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
852; CHECK-NEXT:    sqrtss %xmm0, %xmm0
853; CHECK-NEXT:    movss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
854; CHECK-NEXT:    sqrtss %xmm2, %xmm2
855; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
856; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
857; CHECK-NEXT:    retq
858;
859; AVX-LABEL: constrained_vector_sqrt_v3f32:
860; AVX:       # %bb.0: # %entry
861; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
862; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
863; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
864; AVX-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1
865; AVX-NEXT:    vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
866; AVX-NEXT:    vsqrtss %xmm2, %xmm2, %xmm2
867; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
868; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
869; AVX-NEXT:    retq
870entry:
871  %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32(
872                              <3 x float> <float 42.0, float 43.0, float 44.0>,
873                              metadata !"round.dynamic",
874                              metadata !"fpexcept.strict") #0
875  ret <3 x float> %sqrt
876}
877
878define <3 x double> @constrained_vector_sqrt_v3f64() #0 {
879; CHECK-LABEL: constrained_vector_sqrt_v3f64:
880; CHECK:       # %bb.0: # %entry
881; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
882; CHECK-NEXT:    sqrtsd %xmm0, %xmm1
883; CHECK-NEXT:    sqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
884; CHECK-NEXT:    movsd %xmm1, -{{[0-9]+}}(%rsp)
885; CHECK-NEXT:    movapd %xmm0, %xmm1
886; CHECK-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
887; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
888; CHECK-NEXT:    wait
889; CHECK-NEXT:    retq
890;
891; AVX-LABEL: constrained_vector_sqrt_v3f64:
892; AVX:       # %bb.0: # %entry
893; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
894; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
895; AVX-NEXT:    vsqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
896; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
897; AVX-NEXT:    retq
898entry:
899  %sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64(
900                          <3 x double> <double 42.0, double 42.1, double 42.2>,
901                          metadata !"round.dynamic",
902                          metadata !"fpexcept.strict") #0
903  ret <3 x double> %sqrt
904}
905
906define <4 x double> @constrained_vector_sqrt_v4f64() #0 {
907; CHECK-LABEL: constrained_vector_sqrt_v4f64:
908; CHECK:       # %bb.0: # %entry
909; CHECK-NEXT:    sqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
910; CHECK-NEXT:    sqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
911; CHECK-NEXT:    retq
912;
913; AVX-LABEL: constrained_vector_sqrt_v4f64:
914; AVX:       # %bb.0: # %entry
915; AVX-NEXT:    vsqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
916; AVX-NEXT:    retq
917 entry:
918  %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
919                              <4 x double> <double 42.0, double 42.1,
920                                            double 42.2, double 42.3>,
921                              metadata !"round.dynamic",
922                              metadata !"fpexcept.strict") #0
923  ret <4 x double> %sqrt
924}
925
926define <1 x float> @constrained_vector_pow_v1f32() #0 {
927; CHECK-LABEL: constrained_vector_pow_v1f32:
928; CHECK:       # %bb.0: # %entry
929; CHECK-NEXT:    pushq %rax
930; CHECK-NEXT:    .cfi_def_cfa_offset 16
931; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
932; CHECK-NEXT:    movss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
933; CHECK-NEXT:    callq powf@PLT
934; CHECK-NEXT:    popq %rax
935; CHECK-NEXT:    .cfi_def_cfa_offset 8
936; CHECK-NEXT:    retq
937;
938; AVX-LABEL: constrained_vector_pow_v1f32:
939; AVX:       # %bb.0: # %entry
940; AVX-NEXT:    pushq %rax
941; AVX-NEXT:    .cfi_def_cfa_offset 16
942; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
943; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
944; AVX-NEXT:    callq powf@PLT
945; AVX-NEXT:    popq %rax
946; AVX-NEXT:    .cfi_def_cfa_offset 8
947; AVX-NEXT:    retq
948entry:
949  %pow = call <1 x float> @llvm.experimental.constrained.pow.v1f32(
950                             <1 x float> <float 42.0>,
951                             <1 x float> <float 3.0>,
952                             metadata !"round.dynamic",
953                             metadata !"fpexcept.strict") #0
954  ret <1 x float> %pow
955}
956
957define <2 x double> @constrained_vector_pow_v2f64() #0 {
958; CHECK-LABEL: constrained_vector_pow_v2f64:
959; CHECK:       # %bb.0: # %entry
960; CHECK-NEXT:    subq $24, %rsp
961; CHECK-NEXT:    .cfi_def_cfa_offset 32
962; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
963; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
964; CHECK-NEXT:    callq pow@PLT
965; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
966; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
967; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
968; CHECK-NEXT:    callq pow@PLT
969; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
970; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
971; CHECK-NEXT:    addq $24, %rsp
972; CHECK-NEXT:    .cfi_def_cfa_offset 8
973; CHECK-NEXT:    retq
974;
975; AVX-LABEL: constrained_vector_pow_v2f64:
976; AVX:       # %bb.0: # %entry
977; AVX-NEXT:    subq $24, %rsp
978; AVX-NEXT:    .cfi_def_cfa_offset 32
979; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
980; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
981; AVX-NEXT:    callq pow@PLT
982; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
983; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
984; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
985; AVX-NEXT:    callq pow@PLT
986; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
987; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
988; AVX-NEXT:    addq $24, %rsp
989; AVX-NEXT:    .cfi_def_cfa_offset 8
990; AVX-NEXT:    retq
991entry:
992  %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64(
993                             <2 x double> <double 42.1, double 42.2>,
994                             <2 x double> <double 3.0, double 3.0>,
995                             metadata !"round.dynamic",
996                             metadata !"fpexcept.strict") #0
997  ret <2 x double> %pow
998}
999
1000define <3 x float> @constrained_vector_pow_v3f32() #0 {
1001; CHECK-LABEL: constrained_vector_pow_v3f32:
1002; CHECK:       # %bb.0: # %entry
1003; CHECK-NEXT:    subq $40, %rsp
1004; CHECK-NEXT:    .cfi_def_cfa_offset 48
1005; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
1006; CHECK-NEXT:    movss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
1007; CHECK-NEXT:    callq powf@PLT
1008; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1009; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1010; CHECK-NEXT:    movss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
1011; CHECK-NEXT:    callq powf@PLT
1012; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1013; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
1014; CHECK-NEXT:    movss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
1015; CHECK-NEXT:    callq powf@PLT
1016; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
1017; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1018; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1019; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1020; CHECK-NEXT:    movaps %xmm1, %xmm0
1021; CHECK-NEXT:    addq $40, %rsp
1022; CHECK-NEXT:    .cfi_def_cfa_offset 8
1023; CHECK-NEXT:    retq
1024;
1025; AVX-LABEL: constrained_vector_pow_v3f32:
1026; AVX:       # %bb.0: # %entry
1027; AVX-NEXT:    subq $40, %rsp
1028; AVX-NEXT:    .cfi_def_cfa_offset 48
1029; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
1030; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
1031; AVX-NEXT:    callq powf@PLT
1032; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1033; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1034; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
1035; AVX-NEXT:    callq powf@PLT
1036; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1037; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
1038; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
1039; AVX-NEXT:    callq powf@PLT
1040; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1041; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1042; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1043; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1044; AVX-NEXT:    addq $40, %rsp
1045; AVX-NEXT:    .cfi_def_cfa_offset 8
1046; AVX-NEXT:    retq
1047entry:
1048  %pow = call <3 x float> @llvm.experimental.constrained.pow.v3f32(
1049                             <3 x float> <float 42.0, float 43.0, float 44.0>,
1050                             <3 x float> <float 3.0, float 3.0, float 3.0>,
1051                             metadata !"round.dynamic",
1052                             metadata !"fpexcept.strict") #0
1053  ret <3 x float> %pow
1054}
1055
1056define <3 x double> @constrained_vector_pow_v3f64() #0 {
1057; CHECK-LABEL: constrained_vector_pow_v3f64:
1058; CHECK:       # %bb.0: # %entry
1059; CHECK-NEXT:    subq $24, %rsp
1060; CHECK-NEXT:    .cfi_def_cfa_offset 32
1061; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1062; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1063; CHECK-NEXT:    callq pow@PLT
1064; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1065; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1066; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1067; CHECK-NEXT:    callq pow@PLT
1068; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
1069; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1070; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1071; CHECK-NEXT:    callq pow@PLT
1072; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
1073; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
1074; CHECK-NEXT:    wait
1075; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
1076; CHECK-NEXT:    # xmm0 = mem[0],zero
1077; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1078; CHECK-NEXT:    # xmm1 = mem[0],zero
1079; CHECK-NEXT:    addq $24, %rsp
1080; CHECK-NEXT:    .cfi_def_cfa_offset 8
1081; CHECK-NEXT:    retq
1082;
1083; AVX-LABEL: constrained_vector_pow_v3f64:
1084; AVX:       # %bb.0: # %entry
1085; AVX-NEXT:    subq $40, %rsp
1086; AVX-NEXT:    .cfi_def_cfa_offset 48
1087; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1088; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1089; AVX-NEXT:    callq pow@PLT
1090; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1091; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1092; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1093; AVX-NEXT:    callq pow@PLT
1094; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1095; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1096; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
1097; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1098; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1099; AVX-NEXT:    vzeroupper
1100; AVX-NEXT:    callq pow@PLT
1101; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1102; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1103; AVX-NEXT:    addq $40, %rsp
1104; AVX-NEXT:    .cfi_def_cfa_offset 8
1105; AVX-NEXT:    retq
1106entry:
1107  %pow = call <3 x double> @llvm.experimental.constrained.pow.v3f64(
1108                          <3 x double> <double 42.0, double 42.1, double 42.2>,
1109                          <3 x double> <double 3.0, double 3.0, double 3.0>,
1110                          metadata !"round.dynamic",
1111                          metadata !"fpexcept.strict") #0
1112  ret <3 x double> %pow
1113}
1114
1115define <4 x double> @constrained_vector_pow_v4f64() #0 {
1116; CHECK-LABEL: constrained_vector_pow_v4f64:
1117; CHECK:       # %bb.0: # %entry
1118; CHECK-NEXT:    subq $40, %rsp
1119; CHECK-NEXT:    .cfi_def_cfa_offset 48
1120; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1121; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1122; CHECK-NEXT:    callq pow@PLT
1123; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1124; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1125; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1126; CHECK-NEXT:    callq pow@PLT
1127; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1128; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1129; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1130; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0]
1131; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1132; CHECK-NEXT:    callq pow@PLT
1133; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1134; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
1135; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1136; CHECK-NEXT:    callq pow@PLT
1137; CHECK-NEXT:    movaps %xmm0, %xmm1
1138; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1139; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1140; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1141; CHECK-NEXT:    addq $40, %rsp
1142; CHECK-NEXT:    .cfi_def_cfa_offset 8
1143; CHECK-NEXT:    retq
1144;
1145; AVX-LABEL: constrained_vector_pow_v4f64:
1146; AVX:       # %bb.0: # %entry
1147; AVX-NEXT:    subq $40, %rsp
1148; AVX-NEXT:    .cfi_def_cfa_offset 48
1149; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0]
1150; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1151; AVX-NEXT:    callq pow@PLT
1152; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1153; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
1154; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1155; AVX-NEXT:    callq pow@PLT
1156; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1157; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1158; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1159; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1160; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1161; AVX-NEXT:    callq pow@PLT
1162; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1163; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1164; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
1165; AVX-NEXT:    callq pow@PLT
1166; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1167; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1168; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
1169; AVX-NEXT:    addq $40, %rsp
1170; AVX-NEXT:    .cfi_def_cfa_offset 8
1171; AVX-NEXT:    retq
1172entry:
1173  %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64(
1174                             <4 x double> <double 42.1, double 42.2,
1175                                           double 42.3, double 42.4>,
1176                             <4 x double> <double 3.0, double 3.0,
1177                                           double 3.0, double 3.0>,
1178                             metadata !"round.dynamic",
1179                             metadata !"fpexcept.strict") #0
1180  ret <4 x double> %pow
1181}
1182
1183define <1 x float> @constrained_vector_powi_v1f32() #0 {
1184; CHECK-LABEL: constrained_vector_powi_v1f32:
1185; CHECK:       # %bb.0: # %entry
1186; CHECK-NEXT:    pushq %rax
1187; CHECK-NEXT:    .cfi_def_cfa_offset 16
1188; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1189; CHECK-NEXT:    movl $3, %edi
1190; CHECK-NEXT:    callq __powisf2@PLT
1191; CHECK-NEXT:    popq %rax
1192; CHECK-NEXT:    .cfi_def_cfa_offset 8
1193; CHECK-NEXT:    retq
1194;
1195; AVX-LABEL: constrained_vector_powi_v1f32:
1196; AVX:       # %bb.0: # %entry
1197; AVX-NEXT:    pushq %rax
1198; AVX-NEXT:    .cfi_def_cfa_offset 16
1199; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1200; AVX-NEXT:    movl $3, %edi
1201; AVX-NEXT:    callq __powisf2@PLT
1202; AVX-NEXT:    popq %rax
1203; AVX-NEXT:    .cfi_def_cfa_offset 8
1204; AVX-NEXT:    retq
1205entry:
1206  %powi = call <1 x float> @llvm.experimental.constrained.powi.v1f32(
1207                              <1 x float> <float 42.0>,
1208                              i32 3,
1209                              metadata !"round.dynamic",
1210                              metadata !"fpexcept.strict") #0
1211  ret <1 x float> %powi
1212}
1213
1214define <2 x double> @constrained_vector_powi_v2f64() #0 {
1215; CHECK-LABEL: constrained_vector_powi_v2f64:
1216; CHECK:       # %bb.0: # %entry
1217; CHECK-NEXT:    subq $24, %rsp
1218; CHECK-NEXT:    .cfi_def_cfa_offset 32
1219; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1220; CHECK-NEXT:    movl $3, %edi
1221; CHECK-NEXT:    callq __powidf2@PLT
1222; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1223; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1224; CHECK-NEXT:    movl $3, %edi
1225; CHECK-NEXT:    callq __powidf2@PLT
1226; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1227; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1228; CHECK-NEXT:    addq $24, %rsp
1229; CHECK-NEXT:    .cfi_def_cfa_offset 8
1230; CHECK-NEXT:    retq
1231;
1232; AVX-LABEL: constrained_vector_powi_v2f64:
1233; AVX:       # %bb.0: # %entry
1234; AVX-NEXT:    subq $24, %rsp
1235; AVX-NEXT:    .cfi_def_cfa_offset 32
1236; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1237; AVX-NEXT:    movl $3, %edi
1238; AVX-NEXT:    callq __powidf2@PLT
1239; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1240; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1241; AVX-NEXT:    movl $3, %edi
1242; AVX-NEXT:    callq __powidf2@PLT
1243; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1244; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1245; AVX-NEXT:    addq $24, %rsp
1246; AVX-NEXT:    .cfi_def_cfa_offset 8
1247; AVX-NEXT:    retq
1248entry:
1249  %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64(
1250                              <2 x double> <double 42.1, double 42.2>,
1251                              i32 3,
1252                              metadata !"round.dynamic",
1253                              metadata !"fpexcept.strict") #0
1254  ret <2 x double> %powi
1255}
1256
1257define <3 x float> @constrained_vector_powi_v3f32() #0 {
1258; CHECK-LABEL: constrained_vector_powi_v3f32:
1259; CHECK:       # %bb.0: # %entry
1260; CHECK-NEXT:    subq $40, %rsp
1261; CHECK-NEXT:    .cfi_def_cfa_offset 48
1262; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
1263; CHECK-NEXT:    movl $3, %edi
1264; CHECK-NEXT:    callq __powisf2@PLT
1265; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1266; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1267; CHECK-NEXT:    movl $3, %edi
1268; CHECK-NEXT:    callq __powisf2@PLT
1269; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1270; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
1271; CHECK-NEXT:    movl $3, %edi
1272; CHECK-NEXT:    callq __powisf2@PLT
1273; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
1274; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1275; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1276; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1277; CHECK-NEXT:    movaps %xmm1, %xmm0
1278; CHECK-NEXT:    addq $40, %rsp
1279; CHECK-NEXT:    .cfi_def_cfa_offset 8
1280; CHECK-NEXT:    retq
1281;
1282; AVX-LABEL: constrained_vector_powi_v3f32:
1283; AVX:       # %bb.0: # %entry
1284; AVX-NEXT:    subq $40, %rsp
1285; AVX-NEXT:    .cfi_def_cfa_offset 48
1286; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
1287; AVX-NEXT:    movl $3, %edi
1288; AVX-NEXT:    callq __powisf2@PLT
1289; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1290; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1291; AVX-NEXT:    movl $3, %edi
1292; AVX-NEXT:    callq __powisf2@PLT
1293; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1294; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
1295; AVX-NEXT:    movl $3, %edi
1296; AVX-NEXT:    callq __powisf2@PLT
1297; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1298; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1299; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1300; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1301; AVX-NEXT:    addq $40, %rsp
1302; AVX-NEXT:    .cfi_def_cfa_offset 8
1303; AVX-NEXT:    retq
1304entry:
1305  %powi = call <3 x float> @llvm.experimental.constrained.powi.v3f32(
1306                              <3 x float> <float 42.0, float 43.0, float 44.0>,
1307                              i32 3,
1308                              metadata !"round.dynamic",
1309                              metadata !"fpexcept.strict") #0
1310  ret <3 x float> %powi
1311}
1312
1313define <3 x double> @constrained_vector_powi_v3f64() #0 {
1314; CHECK-LABEL: constrained_vector_powi_v3f64:
1315; CHECK:       # %bb.0: # %entry
1316; CHECK-NEXT:    subq $24, %rsp
1317; CHECK-NEXT:    .cfi_def_cfa_offset 32
1318; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1319; CHECK-NEXT:    movl $3, %edi
1320; CHECK-NEXT:    callq __powidf2@PLT
1321; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1322; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1323; CHECK-NEXT:    movl $3, %edi
1324; CHECK-NEXT:    callq __powidf2@PLT
1325; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
1326; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1327; CHECK-NEXT:    movl $3, %edi
1328; CHECK-NEXT:    callq __powidf2@PLT
1329; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
1330; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
1331; CHECK-NEXT:    wait
1332; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
1333; CHECK-NEXT:    # xmm0 = mem[0],zero
1334; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1335; CHECK-NEXT:    # xmm1 = mem[0],zero
1336; CHECK-NEXT:    addq $24, %rsp
1337; CHECK-NEXT:    .cfi_def_cfa_offset 8
1338; CHECK-NEXT:    retq
1339;
1340; AVX-LABEL: constrained_vector_powi_v3f64:
1341; AVX:       # %bb.0: # %entry
1342; AVX-NEXT:    subq $40, %rsp
1343; AVX-NEXT:    .cfi_def_cfa_offset 48
1344; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1345; AVX-NEXT:    movl $3, %edi
1346; AVX-NEXT:    callq __powidf2@PLT
1347; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1348; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1349; AVX-NEXT:    movl $3, %edi
1350; AVX-NEXT:    callq __powidf2@PLT
1351; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1352; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1353; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
1354; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1355; AVX-NEXT:    movl $3, %edi
1356; AVX-NEXT:    vzeroupper
1357; AVX-NEXT:    callq __powidf2@PLT
1358; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1359; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1360; AVX-NEXT:    addq $40, %rsp
1361; AVX-NEXT:    .cfi_def_cfa_offset 8
1362; AVX-NEXT:    retq
1363entry:
1364  %powi = call <3 x double> @llvm.experimental.constrained.powi.v3f64(
1365                          <3 x double> <double 42.0, double 42.1, double 42.2>,
1366                          i32 3,
1367                          metadata !"round.dynamic",
1368                          metadata !"fpexcept.strict") #0
1369  ret <3 x double> %powi
1370}
1371
1372define <4 x double> @constrained_vector_powi_v4f64() #0 {
1373; CHECK-LABEL: constrained_vector_powi_v4f64:
1374; CHECK:       # %bb.0: # %entry
1375; CHECK-NEXT:    subq $40, %rsp
1376; CHECK-NEXT:    .cfi_def_cfa_offset 48
1377; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1378; CHECK-NEXT:    movl $3, %edi
1379; CHECK-NEXT:    callq __powidf2@PLT
1380; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1381; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1382; CHECK-NEXT:    movl $3, %edi
1383; CHECK-NEXT:    callq __powidf2@PLT
1384; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1385; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1386; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1387; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0]
1388; CHECK-NEXT:    movl $3, %edi
1389; CHECK-NEXT:    callq __powidf2@PLT
1390; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1391; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
1392; CHECK-NEXT:    movl $3, %edi
1393; CHECK-NEXT:    callq __powidf2@PLT
1394; CHECK-NEXT:    movaps %xmm0, %xmm1
1395; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1396; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1397; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1398; CHECK-NEXT:    addq $40, %rsp
1399; CHECK-NEXT:    .cfi_def_cfa_offset 8
1400; CHECK-NEXT:    retq
1401;
1402; AVX-LABEL: constrained_vector_powi_v4f64:
1403; AVX:       # %bb.0: # %entry
1404; AVX-NEXT:    subq $40, %rsp
1405; AVX-NEXT:    .cfi_def_cfa_offset 48
1406; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0]
1407; AVX-NEXT:    movl $3, %edi
1408; AVX-NEXT:    callq __powidf2@PLT
1409; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1410; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
1411; AVX-NEXT:    movl $3, %edi
1412; AVX-NEXT:    callq __powidf2@PLT
1413; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1414; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1415; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1416; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1417; AVX-NEXT:    movl $3, %edi
1418; AVX-NEXT:    callq __powidf2@PLT
1419; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1420; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1421; AVX-NEXT:    movl $3, %edi
1422; AVX-NEXT:    callq __powidf2@PLT
1423; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1424; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1425; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
1426; AVX-NEXT:    addq $40, %rsp
1427; AVX-NEXT:    .cfi_def_cfa_offset 8
1428; AVX-NEXT:    retq
1429entry:
1430  %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64(
1431                              <4 x double> <double 42.1, double 42.2,
1432                                            double 42.3, double 42.4>,
1433                              i32 3,
1434                              metadata !"round.dynamic",
1435                              metadata !"fpexcept.strict") #0
1436  ret <4 x double> %powi
1437}
1438
1439define <1 x float> @constrained_vector_sin_v1f32() #0 {
1440; CHECK-LABEL: constrained_vector_sin_v1f32:
1441; CHECK:       # %bb.0: # %entry
1442; CHECK-NEXT:    pushq %rax
1443; CHECK-NEXT:    .cfi_def_cfa_offset 16
1444; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1445; CHECK-NEXT:    callq sinf@PLT
1446; CHECK-NEXT:    popq %rax
1447; CHECK-NEXT:    .cfi_def_cfa_offset 8
1448; CHECK-NEXT:    retq
1449;
1450; AVX-LABEL: constrained_vector_sin_v1f32:
1451; AVX:       # %bb.0: # %entry
1452; AVX-NEXT:    pushq %rax
1453; AVX-NEXT:    .cfi_def_cfa_offset 16
1454; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1455; AVX-NEXT:    callq sinf@PLT
1456; AVX-NEXT:    popq %rax
1457; AVX-NEXT:    .cfi_def_cfa_offset 8
1458; AVX-NEXT:    retq
1459entry:
1460  %sin = call <1 x float> @llvm.experimental.constrained.sin.v1f32(
1461                             <1 x float> <float 42.0>,
1462                             metadata !"round.dynamic",
1463                             metadata !"fpexcept.strict") #0
1464  ret <1 x float> %sin
1465}
1466
1467define <2 x double> @constrained_vector_sin_v2f64() #0 {
1468; CHECK-LABEL: constrained_vector_sin_v2f64:
1469; CHECK:       # %bb.0: # %entry
1470; CHECK-NEXT:    subq $24, %rsp
1471; CHECK-NEXT:    .cfi_def_cfa_offset 32
1472; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1473; CHECK-NEXT:    callq sin@PLT
1474; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1475; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1476; CHECK-NEXT:    callq sin@PLT
1477; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1478; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1479; CHECK-NEXT:    addq $24, %rsp
1480; CHECK-NEXT:    .cfi_def_cfa_offset 8
1481; CHECK-NEXT:    retq
1482;
1483; AVX-LABEL: constrained_vector_sin_v2f64:
1484; AVX:       # %bb.0: # %entry
1485; AVX-NEXT:    subq $24, %rsp
1486; AVX-NEXT:    .cfi_def_cfa_offset 32
1487; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1488; AVX-NEXT:    callq sin@PLT
1489; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1490; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1491; AVX-NEXT:    callq sin@PLT
1492; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1493; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1494; AVX-NEXT:    addq $24, %rsp
1495; AVX-NEXT:    .cfi_def_cfa_offset 8
1496; AVX-NEXT:    retq
1497entry:
1498  %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64(
1499                             <2 x double> <double 42.0, double 42.1>,
1500                             metadata !"round.dynamic",
1501                             metadata !"fpexcept.strict") #0
1502  ret <2 x double> %sin
1503}
1504
1505define <3 x float> @constrained_vector_sin_v3f32() #0 {
1506; CHECK-LABEL: constrained_vector_sin_v3f32:
1507; CHECK:       # %bb.0: # %entry
1508; CHECK-NEXT:    subq $40, %rsp
1509; CHECK-NEXT:    .cfi_def_cfa_offset 48
1510; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
1511; CHECK-NEXT:    callq sinf@PLT
1512; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1513; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1514; CHECK-NEXT:    callq sinf@PLT
1515; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1516; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
1517; CHECK-NEXT:    callq sinf@PLT
1518; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
1519; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1520; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1521; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1522; CHECK-NEXT:    movaps %xmm1, %xmm0
1523; CHECK-NEXT:    addq $40, %rsp
1524; CHECK-NEXT:    .cfi_def_cfa_offset 8
1525; CHECK-NEXT:    retq
1526;
1527; AVX-LABEL: constrained_vector_sin_v3f32:
1528; AVX:       # %bb.0: # %entry
1529; AVX-NEXT:    subq $40, %rsp
1530; AVX-NEXT:    .cfi_def_cfa_offset 48
1531; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
1532; AVX-NEXT:    callq sinf@PLT
1533; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1534; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1535; AVX-NEXT:    callq sinf@PLT
1536; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1537; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
1538; AVX-NEXT:    callq sinf@PLT
1539; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1540; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1541; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1542; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1543; AVX-NEXT:    addq $40, %rsp
1544; AVX-NEXT:    .cfi_def_cfa_offset 8
1545; AVX-NEXT:    retq
1546entry:
1547  %sin = call <3 x float> @llvm.experimental.constrained.sin.v3f32(
1548                              <3 x float> <float 42.0, float 43.0, float 44.0>,
1549                              metadata !"round.dynamic",
1550                              metadata !"fpexcept.strict") #0
1551  ret <3 x float> %sin
1552}
1553
1554define <3 x double> @constrained_vector_sin_v3f64() #0 {
1555; CHECK-LABEL: constrained_vector_sin_v3f64:
1556; CHECK:       # %bb.0: # %entry
1557; CHECK-NEXT:    subq $24, %rsp
1558; CHECK-NEXT:    .cfi_def_cfa_offset 32
1559; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1560; CHECK-NEXT:    callq sin@PLT
1561; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1562; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1563; CHECK-NEXT:    callq sin@PLT
1564; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
1565; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1566; CHECK-NEXT:    callq sin@PLT
1567; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
1568; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
1569; CHECK-NEXT:    wait
1570; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
1571; CHECK-NEXT:    # xmm0 = mem[0],zero
1572; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1573; CHECK-NEXT:    # xmm1 = mem[0],zero
1574; CHECK-NEXT:    addq $24, %rsp
1575; CHECK-NEXT:    .cfi_def_cfa_offset 8
1576; CHECK-NEXT:    retq
1577;
1578; AVX-LABEL: constrained_vector_sin_v3f64:
1579; AVX:       # %bb.0: # %entry
1580; AVX-NEXT:    subq $40, %rsp
1581; AVX-NEXT:    .cfi_def_cfa_offset 48
1582; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1583; AVX-NEXT:    callq sin@PLT
1584; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1585; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1586; AVX-NEXT:    callq sin@PLT
1587; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1588; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1589; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
1590; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1591; AVX-NEXT:    vzeroupper
1592; AVX-NEXT:    callq sin@PLT
1593; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1594; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1595; AVX-NEXT:    addq $40, %rsp
1596; AVX-NEXT:    .cfi_def_cfa_offset 8
1597; AVX-NEXT:    retq
1598entry:
1599  %sin = call <3 x double> @llvm.experimental.constrained.sin.v3f64(
1600                          <3 x double> <double 42.0, double 42.1, double 42.2>,
1601                          metadata !"round.dynamic",
1602                          metadata !"fpexcept.strict") #0
1603  ret <3 x double> %sin
1604}
1605
1606define <4 x double> @constrained_vector_sin_v4f64() #0 {
1607; CHECK-LABEL: constrained_vector_sin_v4f64:
1608; CHECK:       # %bb.0: # %entry
1609; CHECK-NEXT:    subq $40, %rsp
1610; CHECK-NEXT:    .cfi_def_cfa_offset 48
1611; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1612; CHECK-NEXT:    callq sin@PLT
1613; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1614; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1615; CHECK-NEXT:    callq sin@PLT
1616; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1617; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1618; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1619; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
1620; CHECK-NEXT:    callq sin@PLT
1621; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1622; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1623; CHECK-NEXT:    callq sin@PLT
1624; CHECK-NEXT:    movaps %xmm0, %xmm1
1625; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1626; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1627; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1628; CHECK-NEXT:    addq $40, %rsp
1629; CHECK-NEXT:    .cfi_def_cfa_offset 8
1630; CHECK-NEXT:    retq
1631;
1632; AVX-LABEL: constrained_vector_sin_v4f64:
1633; AVX:       # %bb.0: # %entry
1634; AVX-NEXT:    subq $40, %rsp
1635; AVX-NEXT:    .cfi_def_cfa_offset 48
1636; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
1637; AVX-NEXT:    callq sin@PLT
1638; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1639; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1640; AVX-NEXT:    callq sin@PLT
1641; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1642; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1643; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1644; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1645; AVX-NEXT:    callq sin@PLT
1646; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1647; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1648; AVX-NEXT:    callq sin@PLT
1649; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1650; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1651; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
1652; AVX-NEXT:    addq $40, %rsp
1653; AVX-NEXT:    .cfi_def_cfa_offset 8
1654; AVX-NEXT:    retq
1655entry:
1656  %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64(
1657                             <4 x double> <double 42.0, double 42.1,
1658                                           double 42.2, double 42.3>,
1659                             metadata !"round.dynamic",
1660                             metadata !"fpexcept.strict") #0
1661  ret <4 x double> %sin
1662}
1663
1664define <1 x float> @constrained_vector_cos_v1f32() #0 {
1665; CHECK-LABEL: constrained_vector_cos_v1f32:
1666; CHECK:       # %bb.0: # %entry
1667; CHECK-NEXT:    pushq %rax
1668; CHECK-NEXT:    .cfi_def_cfa_offset 16
1669; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1670; CHECK-NEXT:    callq cosf@PLT
1671; CHECK-NEXT:    popq %rax
1672; CHECK-NEXT:    .cfi_def_cfa_offset 8
1673; CHECK-NEXT:    retq
1674;
1675; AVX-LABEL: constrained_vector_cos_v1f32:
1676; AVX:       # %bb.0: # %entry
1677; AVX-NEXT:    pushq %rax
1678; AVX-NEXT:    .cfi_def_cfa_offset 16
1679; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1680; AVX-NEXT:    callq cosf@PLT
1681; AVX-NEXT:    popq %rax
1682; AVX-NEXT:    .cfi_def_cfa_offset 8
1683; AVX-NEXT:    retq
1684entry:
1685  %cos = call <1 x float> @llvm.experimental.constrained.cos.v1f32(
1686                             <1 x float> <float 42.0>,
1687                             metadata !"round.dynamic",
1688                             metadata !"fpexcept.strict") #0
1689  ret <1 x float> %cos
1690}
1691
1692define <2 x double> @constrained_vector_cos_v2f64() #0 {
1693; CHECK-LABEL: constrained_vector_cos_v2f64:
1694; CHECK:       # %bb.0: # %entry
1695; CHECK-NEXT:    subq $24, %rsp
1696; CHECK-NEXT:    .cfi_def_cfa_offset 32
1697; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1698; CHECK-NEXT:    callq cos@PLT
1699; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1700; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1701; CHECK-NEXT:    callq cos@PLT
1702; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1703; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1704; CHECK-NEXT:    addq $24, %rsp
1705; CHECK-NEXT:    .cfi_def_cfa_offset 8
1706; CHECK-NEXT:    retq
1707;
1708; AVX-LABEL: constrained_vector_cos_v2f64:
1709; AVX:       # %bb.0: # %entry
1710; AVX-NEXT:    subq $24, %rsp
1711; AVX-NEXT:    .cfi_def_cfa_offset 32
1712; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1713; AVX-NEXT:    callq cos@PLT
1714; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1715; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1716; AVX-NEXT:    callq cos@PLT
1717; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1718; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1719; AVX-NEXT:    addq $24, %rsp
1720; AVX-NEXT:    .cfi_def_cfa_offset 8
1721; AVX-NEXT:    retq
1722entry:
1723  %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64(
1724                             <2 x double> <double 42.0, double 42.1>,
1725                             metadata !"round.dynamic",
1726                             metadata !"fpexcept.strict") #0
1727  ret <2 x double> %cos
1728}
1729
1730define <3 x float> @constrained_vector_cos_v3f32() #0 {
1731; CHECK-LABEL: constrained_vector_cos_v3f32:
1732; CHECK:       # %bb.0: # %entry
1733; CHECK-NEXT:    subq $40, %rsp
1734; CHECK-NEXT:    .cfi_def_cfa_offset 48
1735; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
1736; CHECK-NEXT:    callq cosf@PLT
1737; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1738; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1739; CHECK-NEXT:    callq cosf@PLT
1740; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1741; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
1742; CHECK-NEXT:    callq cosf@PLT
1743; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
1744; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1745; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1746; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1747; CHECK-NEXT:    movaps %xmm1, %xmm0
1748; CHECK-NEXT:    addq $40, %rsp
1749; CHECK-NEXT:    .cfi_def_cfa_offset 8
1750; CHECK-NEXT:    retq
1751;
1752; AVX-LABEL: constrained_vector_cos_v3f32:
1753; AVX:       # %bb.0: # %entry
1754; AVX-NEXT:    subq $40, %rsp
1755; AVX-NEXT:    .cfi_def_cfa_offset 48
1756; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
1757; AVX-NEXT:    callq cosf@PLT
1758; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1759; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1760; AVX-NEXT:    callq cosf@PLT
1761; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1762; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
1763; AVX-NEXT:    callq cosf@PLT
1764; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1765; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1766; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1767; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1768; AVX-NEXT:    addq $40, %rsp
1769; AVX-NEXT:    .cfi_def_cfa_offset 8
1770; AVX-NEXT:    retq
1771entry:
1772  %cos = call <3 x float> @llvm.experimental.constrained.cos.v3f32(
1773                              <3 x float> <float 42.0, float 43.0, float 44.0>,
1774                              metadata !"round.dynamic",
1775                              metadata !"fpexcept.strict") #0
1776  ret <3 x float> %cos
1777}
1778
1779define <3 x double> @constrained_vector_cos_v3f64() #0 {
1780; CHECK-LABEL: constrained_vector_cos_v3f64:
1781; CHECK:       # %bb.0: # %entry
1782; CHECK-NEXT:    subq $24, %rsp
1783; CHECK-NEXT:    .cfi_def_cfa_offset 32
1784; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1785; CHECK-NEXT:    callq cos@PLT
1786; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1787; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1788; CHECK-NEXT:    callq cos@PLT
1789; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
1790; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1791; CHECK-NEXT:    callq cos@PLT
1792; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
1793; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
1794; CHECK-NEXT:    wait
1795; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
1796; CHECK-NEXT:    # xmm0 = mem[0],zero
1797; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1798; CHECK-NEXT:    # xmm1 = mem[0],zero
1799; CHECK-NEXT:    addq $24, %rsp
1800; CHECK-NEXT:    .cfi_def_cfa_offset 8
1801; CHECK-NEXT:    retq
1802;
1803; AVX-LABEL: constrained_vector_cos_v3f64:
1804; AVX:       # %bb.0: # %entry
1805; AVX-NEXT:    subq $40, %rsp
1806; AVX-NEXT:    .cfi_def_cfa_offset 48
1807; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1808; AVX-NEXT:    callq cos@PLT
1809; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1810; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1811; AVX-NEXT:    callq cos@PLT
1812; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1813; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1814; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
1815; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1816; AVX-NEXT:    vzeroupper
1817; AVX-NEXT:    callq cos@PLT
1818; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1819; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1820; AVX-NEXT:    addq $40, %rsp
1821; AVX-NEXT:    .cfi_def_cfa_offset 8
1822; AVX-NEXT:    retq
1823entry:
1824  %cos = call <3 x double> @llvm.experimental.constrained.cos.v3f64(
1825                          <3 x double> <double 42.0, double 42.1, double 42.2>,
1826                          metadata !"round.dynamic",
1827                          metadata !"fpexcept.strict") #0
1828  ret <3 x double> %cos
1829}
1830
1831define <4 x double> @constrained_vector_cos_v4f64() #0 {
1832; CHECK-LABEL: constrained_vector_cos_v4f64:
1833; CHECK:       # %bb.0: # %entry
1834; CHECK-NEXT:    subq $40, %rsp
1835; CHECK-NEXT:    .cfi_def_cfa_offset 48
1836; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1837; CHECK-NEXT:    callq cos@PLT
1838; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1839; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1840; CHECK-NEXT:    callq cos@PLT
1841; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1842; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1843; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1844; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
1845; CHECK-NEXT:    callq cos@PLT
1846; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1847; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1848; CHECK-NEXT:    callq cos@PLT
1849; CHECK-NEXT:    movaps %xmm0, %xmm1
1850; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1851; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1852; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1853; CHECK-NEXT:    addq $40, %rsp
1854; CHECK-NEXT:    .cfi_def_cfa_offset 8
1855; CHECK-NEXT:    retq
1856;
1857; AVX-LABEL: constrained_vector_cos_v4f64:
1858; AVX:       # %bb.0: # %entry
1859; AVX-NEXT:    subq $40, %rsp
1860; AVX-NEXT:    .cfi_def_cfa_offset 48
1861; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
1862; AVX-NEXT:    callq cos@PLT
1863; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1864; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
1865; AVX-NEXT:    callq cos@PLT
1866; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1867; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1868; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1869; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1870; AVX-NEXT:    callq cos@PLT
1871; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1872; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1873; AVX-NEXT:    callq cos@PLT
1874; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1875; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1876; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
1877; AVX-NEXT:    addq $40, %rsp
1878; AVX-NEXT:    .cfi_def_cfa_offset 8
1879; AVX-NEXT:    retq
1880entry:
1881  %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64(
1882                             <4 x double> <double 42.0, double 42.1,
1883                                           double 42.2, double 42.3>,
1884                             metadata !"round.dynamic",
1885                             metadata !"fpexcept.strict") #0
1886  ret <4 x double> %cos
1887}
1888
1889define <1 x float> @constrained_vector_exp_v1f32() #0 {
1890; CHECK-LABEL: constrained_vector_exp_v1f32:
1891; CHECK:       # %bb.0: # %entry
1892; CHECK-NEXT:    pushq %rax
1893; CHECK-NEXT:    .cfi_def_cfa_offset 16
1894; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1895; CHECK-NEXT:    callq expf@PLT
1896; CHECK-NEXT:    popq %rax
1897; CHECK-NEXT:    .cfi_def_cfa_offset 8
1898; CHECK-NEXT:    retq
1899;
1900; AVX-LABEL: constrained_vector_exp_v1f32:
1901; AVX:       # %bb.0: # %entry
1902; AVX-NEXT:    pushq %rax
1903; AVX-NEXT:    .cfi_def_cfa_offset 16
1904; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1905; AVX-NEXT:    callq expf@PLT
1906; AVX-NEXT:    popq %rax
1907; AVX-NEXT:    .cfi_def_cfa_offset 8
1908; AVX-NEXT:    retq
1909entry:
1910  %exp = call <1 x float> @llvm.experimental.constrained.exp.v1f32(
1911                             <1 x float> <float 42.0>,
1912                             metadata !"round.dynamic",
1913                             metadata !"fpexcept.strict") #0
1914  ret <1 x float> %exp
1915}
1916
1917define <2 x double> @constrained_vector_exp_v2f64() #0 {
1918; CHECK-LABEL: constrained_vector_exp_v2f64:
1919; CHECK:       # %bb.0: # %entry
1920; CHECK-NEXT:    subq $24, %rsp
1921; CHECK-NEXT:    .cfi_def_cfa_offset 32
1922; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1923; CHECK-NEXT:    callq exp@PLT
1924; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1925; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1926; CHECK-NEXT:    callq exp@PLT
1927; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
1928; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1929; CHECK-NEXT:    addq $24, %rsp
1930; CHECK-NEXT:    .cfi_def_cfa_offset 8
1931; CHECK-NEXT:    retq
1932;
1933; AVX-LABEL: constrained_vector_exp_v2f64:
1934; AVX:       # %bb.0: # %entry
1935; AVX-NEXT:    subq $24, %rsp
1936; AVX-NEXT:    .cfi_def_cfa_offset 32
1937; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
1938; AVX-NEXT:    callq exp@PLT
1939; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1940; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
1941; AVX-NEXT:    callq exp@PLT
1942; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
1943; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
1944; AVX-NEXT:    addq $24, %rsp
1945; AVX-NEXT:    .cfi_def_cfa_offset 8
1946; AVX-NEXT:    retq
1947entry:
1948  %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64(
1949                             <2 x double> <double 42.0, double 42.1>,
1950                             metadata !"round.dynamic",
1951                             metadata !"fpexcept.strict") #0
1952  ret <2 x double> %exp
1953}
1954
1955define <3 x float> @constrained_vector_exp_v3f32() #0 {
1956; CHECK-LABEL: constrained_vector_exp_v3f32:
1957; CHECK:       # %bb.0: # %entry
1958; CHECK-NEXT:    subq $40, %rsp
1959; CHECK-NEXT:    .cfi_def_cfa_offset 48
1960; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
1961; CHECK-NEXT:    callq expf@PLT
1962; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1963; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1964; CHECK-NEXT:    callq expf@PLT
1965; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
1966; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
1967; CHECK-NEXT:    callq expf@PLT
1968; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
1969; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1970; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1971; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1972; CHECK-NEXT:    movaps %xmm1, %xmm0
1973; CHECK-NEXT:    addq $40, %rsp
1974; CHECK-NEXT:    .cfi_def_cfa_offset 8
1975; CHECK-NEXT:    retq
1976;
1977; AVX-LABEL: constrained_vector_exp_v3f32:
1978; AVX:       # %bb.0: # %entry
1979; AVX-NEXT:    subq $40, %rsp
1980; AVX-NEXT:    .cfi_def_cfa_offset 48
1981; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
1982; AVX-NEXT:    callq expf@PLT
1983; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1984; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
1985; AVX-NEXT:    callq expf@PLT
1986; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1987; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
1988; AVX-NEXT:    callq expf@PLT
1989; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1990; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1991; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
1992; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
1993; AVX-NEXT:    addq $40, %rsp
1994; AVX-NEXT:    .cfi_def_cfa_offset 8
1995; AVX-NEXT:    retq
1996entry:
1997  %exp = call <3 x float> @llvm.experimental.constrained.exp.v3f32(
1998                              <3 x float> <float 42.0, float 43.0, float 44.0>,
1999                              metadata !"round.dynamic",
2000                              metadata !"fpexcept.strict") #0
2001  ret <3 x float> %exp
2002}
2003
2004define <3 x double> @constrained_vector_exp_v3f64() #0 {
2005; CHECK-LABEL: constrained_vector_exp_v3f64:
2006; CHECK:       # %bb.0: # %entry
2007; CHECK-NEXT:    subq $24, %rsp
2008; CHECK-NEXT:    .cfi_def_cfa_offset 32
2009; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2010; CHECK-NEXT:    callq exp@PLT
2011; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2012; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2013; CHECK-NEXT:    callq exp@PLT
2014; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
2015; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2016; CHECK-NEXT:    callq exp@PLT
2017; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
2018; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
2019; CHECK-NEXT:    wait
2020; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
2021; CHECK-NEXT:    # xmm0 = mem[0],zero
2022; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
2023; CHECK-NEXT:    # xmm1 = mem[0],zero
2024; CHECK-NEXT:    addq $24, %rsp
2025; CHECK-NEXT:    .cfi_def_cfa_offset 8
2026; CHECK-NEXT:    retq
2027;
2028; AVX-LABEL: constrained_vector_exp_v3f64:
2029; AVX:       # %bb.0: # %entry
2030; AVX-NEXT:    subq $40, %rsp
2031; AVX-NEXT:    .cfi_def_cfa_offset 48
2032; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2033; AVX-NEXT:    callq exp@PLT
2034; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2035; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2036; AVX-NEXT:    callq exp@PLT
2037; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2038; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2039; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
2040; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2041; AVX-NEXT:    vzeroupper
2042; AVX-NEXT:    callq exp@PLT
2043; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
2044; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2045; AVX-NEXT:    addq $40, %rsp
2046; AVX-NEXT:    .cfi_def_cfa_offset 8
2047; AVX-NEXT:    retq
2048entry:
2049  %exp = call <3 x double> @llvm.experimental.constrained.exp.v3f64(
2050                          <3 x double> <double 42.0, double 42.1, double 42.2>,
2051                          metadata !"round.dynamic",
2052                          metadata !"fpexcept.strict") #0
2053  ret <3 x double> %exp
2054}
2055
2056define <4 x double> @constrained_vector_exp_v4f64() #0 {
2057; CHECK-LABEL: constrained_vector_exp_v4f64:
2058; CHECK:       # %bb.0: # %entry
2059; CHECK-NEXT:    subq $40, %rsp
2060; CHECK-NEXT:    .cfi_def_cfa_offset 48
2061; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2062; CHECK-NEXT:    callq exp@PLT
2063; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2064; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2065; CHECK-NEXT:    callq exp@PLT
2066; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2067; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2068; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2069; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
2070; CHECK-NEXT:    callq exp@PLT
2071; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2072; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2073; CHECK-NEXT:    callq exp@PLT
2074; CHECK-NEXT:    movaps %xmm0, %xmm1
2075; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2076; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2077; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
2078; CHECK-NEXT:    addq $40, %rsp
2079; CHECK-NEXT:    .cfi_def_cfa_offset 8
2080; CHECK-NEXT:    retq
2081;
2082; AVX-LABEL: constrained_vector_exp_v4f64:
2083; AVX:       # %bb.0: # %entry
2084; AVX-NEXT:    subq $40, %rsp
2085; AVX-NEXT:    .cfi_def_cfa_offset 48
2086; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
2087; AVX-NEXT:    callq exp@PLT
2088; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2089; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2090; AVX-NEXT:    callq exp@PLT
2091; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2092; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2093; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2094; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2095; AVX-NEXT:    callq exp@PLT
2096; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2097; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2098; AVX-NEXT:    callq exp@PLT
2099; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2100; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2101; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
2102; AVX-NEXT:    addq $40, %rsp
2103; AVX-NEXT:    .cfi_def_cfa_offset 8
2104; AVX-NEXT:    retq
2105entry:
2106  %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64(
2107                             <4 x double> <double 42.0, double 42.1,
2108                                           double 42.2, double 42.3>,
2109                             metadata !"round.dynamic",
2110                             metadata !"fpexcept.strict") #0
2111  ret <4 x double> %exp
2112}
2113
2114define <1 x float> @constrained_vector_exp2_v1f32() #0 {
2115; CHECK-LABEL: constrained_vector_exp2_v1f32:
2116; CHECK:       # %bb.0: # %entry
2117; CHECK-NEXT:    pushq %rax
2118; CHECK-NEXT:    .cfi_def_cfa_offset 16
2119; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2120; CHECK-NEXT:    callq exp2f@PLT
2121; CHECK-NEXT:    popq %rax
2122; CHECK-NEXT:    .cfi_def_cfa_offset 8
2123; CHECK-NEXT:    retq
2124;
2125; AVX-LABEL: constrained_vector_exp2_v1f32:
2126; AVX:       # %bb.0: # %entry
2127; AVX-NEXT:    pushq %rax
2128; AVX-NEXT:    .cfi_def_cfa_offset 16
2129; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2130; AVX-NEXT:    callq exp2f@PLT
2131; AVX-NEXT:    popq %rax
2132; AVX-NEXT:    .cfi_def_cfa_offset 8
2133; AVX-NEXT:    retq
2134entry:
2135  %exp2 = call <1 x float> @llvm.experimental.constrained.exp2.v1f32(
2136                             <1 x float> <float 42.0>,
2137                             metadata !"round.dynamic",
2138                             metadata !"fpexcept.strict") #0
2139  ret <1 x float> %exp2
2140}
2141
2142define <2 x double> @constrained_vector_exp2_v2f64() #0 {
2143; CHECK-LABEL: constrained_vector_exp2_v2f64:
2144; CHECK:       # %bb.0: # %entry
2145; CHECK-NEXT:    subq $24, %rsp
2146; CHECK-NEXT:    .cfi_def_cfa_offset 32
2147; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2148; CHECK-NEXT:    callq exp2@PLT
2149; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2150; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2151; CHECK-NEXT:    callq exp2@PLT
2152; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2153; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2154; CHECK-NEXT:    addq $24, %rsp
2155; CHECK-NEXT:    .cfi_def_cfa_offset 8
2156; CHECK-NEXT:    retq
2157;
2158; AVX-LABEL: constrained_vector_exp2_v2f64:
2159; AVX:       # %bb.0: # %entry
2160; AVX-NEXT:    subq $24, %rsp
2161; AVX-NEXT:    .cfi_def_cfa_offset 32
2162; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2163; AVX-NEXT:    callq exp2@PLT
2164; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2165; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2166; AVX-NEXT:    callq exp2@PLT
2167; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2168; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2169; AVX-NEXT:    addq $24, %rsp
2170; AVX-NEXT:    .cfi_def_cfa_offset 8
2171; AVX-NEXT:    retq
2172entry:
2173  %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64(
2174                              <2 x double> <double 42.1, double 42.0>,
2175                              metadata !"round.dynamic",
2176                              metadata !"fpexcept.strict") #0
2177  ret <2 x double> %exp2
2178}
2179
2180define <3 x float> @constrained_vector_exp2_v3f32() #0 {
2181; CHECK-LABEL: constrained_vector_exp2_v3f32:
2182; CHECK:       # %bb.0: # %entry
2183; CHECK-NEXT:    subq $40, %rsp
2184; CHECK-NEXT:    .cfi_def_cfa_offset 48
2185; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
2186; CHECK-NEXT:    callq exp2f@PLT
2187; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2188; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2189; CHECK-NEXT:    callq exp2f@PLT
2190; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2191; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
2192; CHECK-NEXT:    callq exp2f@PLT
2193; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
2194; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2195; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2196; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2197; CHECK-NEXT:    movaps %xmm1, %xmm0
2198; CHECK-NEXT:    addq $40, %rsp
2199; CHECK-NEXT:    .cfi_def_cfa_offset 8
2200; CHECK-NEXT:    retq
2201;
2202; AVX-LABEL: constrained_vector_exp2_v3f32:
2203; AVX:       # %bb.0: # %entry
2204; AVX-NEXT:    subq $40, %rsp
2205; AVX-NEXT:    .cfi_def_cfa_offset 48
2206; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
2207; AVX-NEXT:    callq exp2f@PLT
2208; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2209; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2210; AVX-NEXT:    callq exp2f@PLT
2211; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2212; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
2213; AVX-NEXT:    callq exp2f@PLT
2214; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
2215; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
2216; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2217; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
2218; AVX-NEXT:    addq $40, %rsp
2219; AVX-NEXT:    .cfi_def_cfa_offset 8
2220; AVX-NEXT:    retq
2221entry:
2222  %exp2 = call <3 x float> @llvm.experimental.constrained.exp2.v3f32(
2223                              <3 x float> <float 42.0, float 43.0, float 44.0>,
2224                              metadata !"round.dynamic",
2225                              metadata !"fpexcept.strict") #0
2226  ret <3 x float> %exp2
2227}
2228
2229define <3 x double> @constrained_vector_exp2_v3f64() #0 {
2230; CHECK-LABEL: constrained_vector_exp2_v3f64:
2231; CHECK:       # %bb.0: # %entry
2232; CHECK-NEXT:    subq $24, %rsp
2233; CHECK-NEXT:    .cfi_def_cfa_offset 32
2234; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2235; CHECK-NEXT:    callq exp2@PLT
2236; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2237; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2238; CHECK-NEXT:    callq exp2@PLT
2239; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
2240; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2241; CHECK-NEXT:    callq exp2@PLT
2242; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
2243; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
2244; CHECK-NEXT:    wait
2245; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
2246; CHECK-NEXT:    # xmm0 = mem[0],zero
2247; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
2248; CHECK-NEXT:    # xmm1 = mem[0],zero
2249; CHECK-NEXT:    addq $24, %rsp
2250; CHECK-NEXT:    .cfi_def_cfa_offset 8
2251; CHECK-NEXT:    retq
2252;
2253; AVX-LABEL: constrained_vector_exp2_v3f64:
2254; AVX:       # %bb.0: # %entry
2255; AVX-NEXT:    subq $40, %rsp
2256; AVX-NEXT:    .cfi_def_cfa_offset 48
2257; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2258; AVX-NEXT:    callq exp2@PLT
2259; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2260; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2261; AVX-NEXT:    callq exp2@PLT
2262; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2263; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2264; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
2265; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2266; AVX-NEXT:    vzeroupper
2267; AVX-NEXT:    callq exp2@PLT
2268; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
2269; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2270; AVX-NEXT:    addq $40, %rsp
2271; AVX-NEXT:    .cfi_def_cfa_offset 8
2272; AVX-NEXT:    retq
2273entry:
2274  %exp2 = call <3 x double> @llvm.experimental.constrained.exp2.v3f64(
2275                          <3 x double> <double 42.0, double 42.1, double 42.2>,
2276                          metadata !"round.dynamic",
2277                          metadata !"fpexcept.strict") #0
2278  ret <3 x double> %exp2
2279}
2280
2281define <4 x double> @constrained_vector_exp2_v4f64() #0 {
2282; CHECK-LABEL: constrained_vector_exp2_v4f64:
2283; CHECK:       # %bb.0: # %entry
2284; CHECK-NEXT:    subq $40, %rsp
2285; CHECK-NEXT:    .cfi_def_cfa_offset 48
2286; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2287; CHECK-NEXT:    callq exp2@PLT
2288; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2289; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2290; CHECK-NEXT:    callq exp2@PLT
2291; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2292; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2293; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2294; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0]
2295; CHECK-NEXT:    callq exp2@PLT
2296; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2297; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
2298; CHECK-NEXT:    callq exp2@PLT
2299; CHECK-NEXT:    movaps %xmm0, %xmm1
2300; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2301; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2302; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
2303; CHECK-NEXT:    addq $40, %rsp
2304; CHECK-NEXT:    .cfi_def_cfa_offset 8
2305; CHECK-NEXT:    retq
2306;
2307; AVX-LABEL: constrained_vector_exp2_v4f64:
2308; AVX:       # %bb.0: # %entry
2309; AVX-NEXT:    subq $40, %rsp
2310; AVX-NEXT:    .cfi_def_cfa_offset 48
2311; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0]
2312; AVX-NEXT:    callq exp2@PLT
2313; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2314; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
2315; AVX-NEXT:    callq exp2@PLT
2316; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2317; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2318; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2319; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2320; AVX-NEXT:    callq exp2@PLT
2321; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2322; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2323; AVX-NEXT:    callq exp2@PLT
2324; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2325; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2326; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
2327; AVX-NEXT:    addq $40, %rsp
2328; AVX-NEXT:    .cfi_def_cfa_offset 8
2329; AVX-NEXT:    retq
2330entry:
2331  %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64(
2332                              <4 x double> <double 42.1, double 42.2,
2333                                            double 42.3, double 42.4>,
2334                              metadata !"round.dynamic",
2335                              metadata !"fpexcept.strict") #0
2336  ret <4 x double> %exp2
2337}
2338
2339define <1 x float> @constrained_vector_log_v1f32() #0 {
2340; CHECK-LABEL: constrained_vector_log_v1f32:
2341; CHECK:       # %bb.0: # %entry
2342; CHECK-NEXT:    pushq %rax
2343; CHECK-NEXT:    .cfi_def_cfa_offset 16
2344; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2345; CHECK-NEXT:    callq logf@PLT
2346; CHECK-NEXT:    popq %rax
2347; CHECK-NEXT:    .cfi_def_cfa_offset 8
2348; CHECK-NEXT:    retq
2349;
2350; AVX-LABEL: constrained_vector_log_v1f32:
2351; AVX:       # %bb.0: # %entry
2352; AVX-NEXT:    pushq %rax
2353; AVX-NEXT:    .cfi_def_cfa_offset 16
2354; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2355; AVX-NEXT:    callq logf@PLT
2356; AVX-NEXT:    popq %rax
2357; AVX-NEXT:    .cfi_def_cfa_offset 8
2358; AVX-NEXT:    retq
2359entry:
2360  %log = call <1 x float> @llvm.experimental.constrained.log.v1f32(
2361                             <1 x float> <float 42.0>,
2362                             metadata !"round.dynamic",
2363                             metadata !"fpexcept.strict") #0
2364  ret <1 x float> %log
2365}
2366
2367define <2 x double> @constrained_vector_log_v2f64() #0 {
2368; CHECK-LABEL: constrained_vector_log_v2f64:
2369; CHECK:       # %bb.0: # %entry
2370; CHECK-NEXT:    subq $24, %rsp
2371; CHECK-NEXT:    .cfi_def_cfa_offset 32
2372; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2373; CHECK-NEXT:    callq log@PLT
2374; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2375; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2376; CHECK-NEXT:    callq log@PLT
2377; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2378; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2379; CHECK-NEXT:    addq $24, %rsp
2380; CHECK-NEXT:    .cfi_def_cfa_offset 8
2381; CHECK-NEXT:    retq
2382;
2383; AVX-LABEL: constrained_vector_log_v2f64:
2384; AVX:       # %bb.0: # %entry
2385; AVX-NEXT:    subq $24, %rsp
2386; AVX-NEXT:    .cfi_def_cfa_offset 32
2387; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2388; AVX-NEXT:    callq log@PLT
2389; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2390; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2391; AVX-NEXT:    callq log@PLT
2392; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2393; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2394; AVX-NEXT:    addq $24, %rsp
2395; AVX-NEXT:    .cfi_def_cfa_offset 8
2396; AVX-NEXT:    retq
2397entry:
2398  %log = call <2 x double> @llvm.experimental.constrained.log.v2f64(
2399                             <2 x double> <double 42.0, double 42.1>,
2400                             metadata !"round.dynamic",
2401                             metadata !"fpexcept.strict") #0
2402  ret <2 x double> %log
2403}
2404
2405define <3 x float> @constrained_vector_log_v3f32() #0 {
2406; CHECK-LABEL: constrained_vector_log_v3f32:
2407; CHECK:       # %bb.0: # %entry
2408; CHECK-NEXT:    subq $40, %rsp
2409; CHECK-NEXT:    .cfi_def_cfa_offset 48
2410; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
2411; CHECK-NEXT:    callq logf@PLT
2412; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2413; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2414; CHECK-NEXT:    callq logf@PLT
2415; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2416; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
2417; CHECK-NEXT:    callq logf@PLT
2418; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
2419; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2420; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2421; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2422; CHECK-NEXT:    movaps %xmm1, %xmm0
2423; CHECK-NEXT:    addq $40, %rsp
2424; CHECK-NEXT:    .cfi_def_cfa_offset 8
2425; CHECK-NEXT:    retq
2426;
2427; AVX-LABEL: constrained_vector_log_v3f32:
2428; AVX:       # %bb.0: # %entry
2429; AVX-NEXT:    subq $40, %rsp
2430; AVX-NEXT:    .cfi_def_cfa_offset 48
2431; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
2432; AVX-NEXT:    callq logf@PLT
2433; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2434; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2435; AVX-NEXT:    callq logf@PLT
2436; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2437; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
2438; AVX-NEXT:    callq logf@PLT
2439; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
2440; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
2441; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2442; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
2443; AVX-NEXT:    addq $40, %rsp
2444; AVX-NEXT:    .cfi_def_cfa_offset 8
2445; AVX-NEXT:    retq
2446entry:
2447  %log = call <3 x float> @llvm.experimental.constrained.log.v3f32(
2448                              <3 x float> <float 42.0, float 43.0, float 44.0>,
2449                              metadata !"round.dynamic",
2450                              metadata !"fpexcept.strict") #0
2451  ret <3 x float> %log
2452}
2453
2454define <3 x double> @constrained_vector_log_v3f64() #0 {
2455; CHECK-LABEL: constrained_vector_log_v3f64:
2456; CHECK:       # %bb.0: # %entry
2457; CHECK-NEXT:    subq $24, %rsp
2458; CHECK-NEXT:    .cfi_def_cfa_offset 32
2459; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2460; CHECK-NEXT:    callq log@PLT
2461; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2462; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2463; CHECK-NEXT:    callq log@PLT
2464; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
2465; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2466; CHECK-NEXT:    callq log@PLT
2467; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
2468; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
2469; CHECK-NEXT:    wait
2470; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
2471; CHECK-NEXT:    # xmm0 = mem[0],zero
2472; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
2473; CHECK-NEXT:    # xmm1 = mem[0],zero
2474; CHECK-NEXT:    addq $24, %rsp
2475; CHECK-NEXT:    .cfi_def_cfa_offset 8
2476; CHECK-NEXT:    retq
2477;
2478; AVX-LABEL: constrained_vector_log_v3f64:
2479; AVX:       # %bb.0: # %entry
2480; AVX-NEXT:    subq $40, %rsp
2481; AVX-NEXT:    .cfi_def_cfa_offset 48
2482; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2483; AVX-NEXT:    callq log@PLT
2484; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2485; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2486; AVX-NEXT:    callq log@PLT
2487; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2488; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2489; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
2490; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2491; AVX-NEXT:    vzeroupper
2492; AVX-NEXT:    callq log@PLT
2493; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
2494; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2495; AVX-NEXT:    addq $40, %rsp
2496; AVX-NEXT:    .cfi_def_cfa_offset 8
2497; AVX-NEXT:    retq
2498entry:
2499  %log = call <3 x double> @llvm.experimental.constrained.log.v3f64(
2500                          <3 x double> <double 42.0, double 42.1, double 42.2>,
2501                          metadata !"round.dynamic",
2502                          metadata !"fpexcept.strict") #0
2503  ret <3 x double> %log
2504}
2505
2506define <4 x double> @constrained_vector_log_v4f64() #0 {
2507; CHECK-LABEL: constrained_vector_log_v4f64:
2508; CHECK:       # %bb.0: # %entry
2509; CHECK-NEXT:    subq $40, %rsp
2510; CHECK-NEXT:    .cfi_def_cfa_offset 48
2511; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2512; CHECK-NEXT:    callq log@PLT
2513; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2514; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2515; CHECK-NEXT:    callq log@PLT
2516; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2517; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2518; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2519; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
2520; CHECK-NEXT:    callq log@PLT
2521; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2522; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2523; CHECK-NEXT:    callq log@PLT
2524; CHECK-NEXT:    movaps %xmm0, %xmm1
2525; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2526; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2527; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
2528; CHECK-NEXT:    addq $40, %rsp
2529; CHECK-NEXT:    .cfi_def_cfa_offset 8
2530; CHECK-NEXT:    retq
2531;
2532; AVX-LABEL: constrained_vector_log_v4f64:
2533; AVX:       # %bb.0: # %entry
2534; AVX-NEXT:    subq $40, %rsp
2535; AVX-NEXT:    .cfi_def_cfa_offset 48
2536; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
2537; AVX-NEXT:    callq log@PLT
2538; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2539; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2540; AVX-NEXT:    callq log@PLT
2541; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2542; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2543; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2544; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2545; AVX-NEXT:    callq log@PLT
2546; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2547; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2548; AVX-NEXT:    callq log@PLT
2549; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2550; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2551; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
2552; AVX-NEXT:    addq $40, %rsp
2553; AVX-NEXT:    .cfi_def_cfa_offset 8
2554; AVX-NEXT:    retq
2555entry:
2556  %log = call <4 x double> @llvm.experimental.constrained.log.v4f64(
2557                             <4 x double> <double 42.0, double 42.1,
2558                                           double 42.2, double 42.3>,
2559                             metadata !"round.dynamic",
2560                             metadata !"fpexcept.strict") #0
2561  ret <4 x double> %log
2562}
2563
2564define <1 x float> @constrained_vector_log10_v1f32() #0 {
2565; CHECK-LABEL: constrained_vector_log10_v1f32:
2566; CHECK:       # %bb.0: # %entry
2567; CHECK-NEXT:    pushq %rax
2568; CHECK-NEXT:    .cfi_def_cfa_offset 16
2569; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2570; CHECK-NEXT:    callq log10f@PLT
2571; CHECK-NEXT:    popq %rax
2572; CHECK-NEXT:    .cfi_def_cfa_offset 8
2573; CHECK-NEXT:    retq
2574;
2575; AVX-LABEL: constrained_vector_log10_v1f32:
2576; AVX:       # %bb.0: # %entry
2577; AVX-NEXT:    pushq %rax
2578; AVX-NEXT:    .cfi_def_cfa_offset 16
2579; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2580; AVX-NEXT:    callq log10f@PLT
2581; AVX-NEXT:    popq %rax
2582; AVX-NEXT:    .cfi_def_cfa_offset 8
2583; AVX-NEXT:    retq
2584entry:
2585  %log10 = call <1 x float> @llvm.experimental.constrained.log10.v1f32(
2586                             <1 x float> <float 42.0>,
2587                             metadata !"round.dynamic",
2588                             metadata !"fpexcept.strict") #0
2589  ret <1 x float> %log10
2590}
2591
2592define <2 x double> @constrained_vector_log10_v2f64() #0 {
2593; CHECK-LABEL: constrained_vector_log10_v2f64:
2594; CHECK:       # %bb.0: # %entry
2595; CHECK-NEXT:    subq $24, %rsp
2596; CHECK-NEXT:    .cfi_def_cfa_offset 32
2597; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2598; CHECK-NEXT:    callq log10@PLT
2599; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2600; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2601; CHECK-NEXT:    callq log10@PLT
2602; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2603; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2604; CHECK-NEXT:    addq $24, %rsp
2605; CHECK-NEXT:    .cfi_def_cfa_offset 8
2606; CHECK-NEXT:    retq
2607;
2608; AVX-LABEL: constrained_vector_log10_v2f64:
2609; AVX:       # %bb.0: # %entry
2610; AVX-NEXT:    subq $24, %rsp
2611; AVX-NEXT:    .cfi_def_cfa_offset 32
2612; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2613; AVX-NEXT:    callq log10@PLT
2614; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2615; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2616; AVX-NEXT:    callq log10@PLT
2617; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2618; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2619; AVX-NEXT:    addq $24, %rsp
2620; AVX-NEXT:    .cfi_def_cfa_offset 8
2621; AVX-NEXT:    retq
2622entry:
2623  %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64(
2624                               <2 x double> <double 42.0, double 42.1>,
2625                               metadata !"round.dynamic",
2626                               metadata !"fpexcept.strict") #0
2627  ret <2 x double> %log10
2628}
2629
2630define <3 x float> @constrained_vector_log10_v3f32() #0 {
2631; CHECK-LABEL: constrained_vector_log10_v3f32:
2632; CHECK:       # %bb.0: # %entry
2633; CHECK-NEXT:    subq $40, %rsp
2634; CHECK-NEXT:    .cfi_def_cfa_offset 48
2635; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
2636; CHECK-NEXT:    callq log10f@PLT
2637; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2638; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2639; CHECK-NEXT:    callq log10f@PLT
2640; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2641; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
2642; CHECK-NEXT:    callq log10f@PLT
2643; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
2644; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2645; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2646; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2647; CHECK-NEXT:    movaps %xmm1, %xmm0
2648; CHECK-NEXT:    addq $40, %rsp
2649; CHECK-NEXT:    .cfi_def_cfa_offset 8
2650; CHECK-NEXT:    retq
2651;
2652; AVX-LABEL: constrained_vector_log10_v3f32:
2653; AVX:       # %bb.0: # %entry
2654; AVX-NEXT:    subq $40, %rsp
2655; AVX-NEXT:    .cfi_def_cfa_offset 48
2656; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
2657; AVX-NEXT:    callq log10f@PLT
2658; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2659; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2660; AVX-NEXT:    callq log10f@PLT
2661; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2662; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
2663; AVX-NEXT:    callq log10f@PLT
2664; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
2665; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
2666; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2667; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
2668; AVX-NEXT:    addq $40, %rsp
2669; AVX-NEXT:    .cfi_def_cfa_offset 8
2670; AVX-NEXT:    retq
2671entry:
2672  %log10 = call <3 x float> @llvm.experimental.constrained.log10.v3f32(
2673                              <3 x float> <float 42.0, float 43.0, float 44.0>,
2674                              metadata !"round.dynamic",
2675                              metadata !"fpexcept.strict") #0
2676  ret <3 x float> %log10
2677}
2678
2679define <3 x double> @constrained_vector_log10_v3f64() #0 {
2680; CHECK-LABEL: constrained_vector_log10_v3f64:
2681; CHECK:       # %bb.0: # %entry
2682; CHECK-NEXT:    subq $24, %rsp
2683; CHECK-NEXT:    .cfi_def_cfa_offset 32
2684; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2685; CHECK-NEXT:    callq log10@PLT
2686; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2687; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2688; CHECK-NEXT:    callq log10@PLT
2689; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
2690; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2691; CHECK-NEXT:    callq log10@PLT
2692; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
2693; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
2694; CHECK-NEXT:    wait
2695; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
2696; CHECK-NEXT:    # xmm0 = mem[0],zero
2697; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
2698; CHECK-NEXT:    # xmm1 = mem[0],zero
2699; CHECK-NEXT:    addq $24, %rsp
2700; CHECK-NEXT:    .cfi_def_cfa_offset 8
2701; CHECK-NEXT:    retq
2702;
2703; AVX-LABEL: constrained_vector_log10_v3f64:
2704; AVX:       # %bb.0: # %entry
2705; AVX-NEXT:    subq $40, %rsp
2706; AVX-NEXT:    .cfi_def_cfa_offset 48
2707; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2708; AVX-NEXT:    callq log10@PLT
2709; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2710; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2711; AVX-NEXT:    callq log10@PLT
2712; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2713; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2714; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
2715; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2716; AVX-NEXT:    vzeroupper
2717; AVX-NEXT:    callq log10@PLT
2718; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
2719; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2720; AVX-NEXT:    addq $40, %rsp
2721; AVX-NEXT:    .cfi_def_cfa_offset 8
2722; AVX-NEXT:    retq
2723entry:
2724  %log10 = call <3 x double> @llvm.experimental.constrained.log10.v3f64(
2725                          <3 x double> <double 42.0, double 42.1, double 42.2>,
2726                          metadata !"round.dynamic",
2727                          metadata !"fpexcept.strict") #0
2728  ret <3 x double> %log10
2729}
2730
2731define <4 x double> @constrained_vector_log10_v4f64() #0 {
2732; CHECK-LABEL: constrained_vector_log10_v4f64:
2733; CHECK:       # %bb.0: # %entry
2734; CHECK-NEXT:    subq $40, %rsp
2735; CHECK-NEXT:    .cfi_def_cfa_offset 48
2736; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2737; CHECK-NEXT:    callq log10@PLT
2738; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2739; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2740; CHECK-NEXT:    callq log10@PLT
2741; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2742; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2743; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2744; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
2745; CHECK-NEXT:    callq log10@PLT
2746; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2747; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2748; CHECK-NEXT:    callq log10@PLT
2749; CHECK-NEXT:    movaps %xmm0, %xmm1
2750; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2751; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2752; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
2753; CHECK-NEXT:    addq $40, %rsp
2754; CHECK-NEXT:    .cfi_def_cfa_offset 8
2755; CHECK-NEXT:    retq
2756;
2757; AVX-LABEL: constrained_vector_log10_v4f64:
2758; AVX:       # %bb.0: # %entry
2759; AVX-NEXT:    subq $40, %rsp
2760; AVX-NEXT:    .cfi_def_cfa_offset 48
2761; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
2762; AVX-NEXT:    callq log10@PLT
2763; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2764; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2765; AVX-NEXT:    callq log10@PLT
2766; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2767; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2768; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2769; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2770; AVX-NEXT:    callq log10@PLT
2771; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2772; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2773; AVX-NEXT:    callq log10@PLT
2774; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2775; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2776; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
2777; AVX-NEXT:    addq $40, %rsp
2778; AVX-NEXT:    .cfi_def_cfa_offset 8
2779; AVX-NEXT:    retq
2780entry:
2781  %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64(
2782                               <4 x double> <double 42.0, double 42.1,
2783                                             double 42.2, double 42.3>,
2784                               metadata !"round.dynamic",
2785                               metadata !"fpexcept.strict") #0
2786  ret <4 x double> %log10
2787}
2788
2789define <1 x float> @constrained_vector_log2_v1f32() #0 {
2790; CHECK-LABEL: constrained_vector_log2_v1f32:
2791; CHECK:       # %bb.0: # %entry
2792; CHECK-NEXT:    pushq %rax
2793; CHECK-NEXT:    .cfi_def_cfa_offset 16
2794; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2795; CHECK-NEXT:    callq log2f@PLT
2796; CHECK-NEXT:    popq %rax
2797; CHECK-NEXT:    .cfi_def_cfa_offset 8
2798; CHECK-NEXT:    retq
2799;
2800; AVX-LABEL: constrained_vector_log2_v1f32:
2801; AVX:       # %bb.0: # %entry
2802; AVX-NEXT:    pushq %rax
2803; AVX-NEXT:    .cfi_def_cfa_offset 16
2804; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2805; AVX-NEXT:    callq log2f@PLT
2806; AVX-NEXT:    popq %rax
2807; AVX-NEXT:    .cfi_def_cfa_offset 8
2808; AVX-NEXT:    retq
2809entry:
2810  %log2 = call <1 x float> @llvm.experimental.constrained.log2.v1f32(
2811                             <1 x float> <float 42.0>,
2812                             metadata !"round.dynamic",
2813                             metadata !"fpexcept.strict") #0
2814  ret <1 x float> %log2
2815}
2816
2817define <2 x double> @constrained_vector_log2_v2f64() #0 {
2818; CHECK-LABEL: constrained_vector_log2_v2f64:
2819; CHECK:       # %bb.0: # %entry
2820; CHECK-NEXT:    subq $24, %rsp
2821; CHECK-NEXT:    .cfi_def_cfa_offset 32
2822; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2823; CHECK-NEXT:    callq log2@PLT
2824; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2825; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2826; CHECK-NEXT:    callq log2@PLT
2827; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2828; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2829; CHECK-NEXT:    addq $24, %rsp
2830; CHECK-NEXT:    .cfi_def_cfa_offset 8
2831; CHECK-NEXT:    retq
2832;
2833; AVX-LABEL: constrained_vector_log2_v2f64:
2834; AVX:       # %bb.0: # %entry
2835; AVX-NEXT:    subq $24, %rsp
2836; AVX-NEXT:    .cfi_def_cfa_offset 32
2837; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2838; AVX-NEXT:    callq log2@PLT
2839; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2840; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2841; AVX-NEXT:    callq log2@PLT
2842; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2843; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2844; AVX-NEXT:    addq $24, %rsp
2845; AVX-NEXT:    .cfi_def_cfa_offset 8
2846; AVX-NEXT:    retq
2847entry:
2848  %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64(
2849                              <2 x double> <double 42.0, double 42.1>,
2850                              metadata !"round.dynamic",
2851                              metadata !"fpexcept.strict") #0
2852  ret <2 x double> %log2
2853}
2854
2855define <3 x float> @constrained_vector_log2_v3f32() #0 {
2856; CHECK-LABEL: constrained_vector_log2_v3f32:
2857; CHECK:       # %bb.0: # %entry
2858; CHECK-NEXT:    subq $40, %rsp
2859; CHECK-NEXT:    .cfi_def_cfa_offset 48
2860; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
2861; CHECK-NEXT:    callq log2f@PLT
2862; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2863; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2864; CHECK-NEXT:    callq log2f@PLT
2865; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2866; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
2867; CHECK-NEXT:    callq log2f@PLT
2868; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
2869; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2870; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2871; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2872; CHECK-NEXT:    movaps %xmm1, %xmm0
2873; CHECK-NEXT:    addq $40, %rsp
2874; CHECK-NEXT:    .cfi_def_cfa_offset 8
2875; CHECK-NEXT:    retq
2876;
2877; AVX-LABEL: constrained_vector_log2_v3f32:
2878; AVX:       # %bb.0: # %entry
2879; AVX-NEXT:    subq $40, %rsp
2880; AVX-NEXT:    .cfi_def_cfa_offset 48
2881; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
2882; AVX-NEXT:    callq log2f@PLT
2883; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2884; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
2885; AVX-NEXT:    callq log2f@PLT
2886; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2887; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
2888; AVX-NEXT:    callq log2f@PLT
2889; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
2890; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
2891; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
2892; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
2893; AVX-NEXT:    addq $40, %rsp
2894; AVX-NEXT:    .cfi_def_cfa_offset 8
2895; AVX-NEXT:    retq
2896entry:
2897  %log2 = call <3 x float> @llvm.experimental.constrained.log2.v3f32(
2898                              <3 x float> <float 42.0, float 43.0, float 44.0>,
2899                              metadata !"round.dynamic",
2900                              metadata !"fpexcept.strict") #0
2901  ret <3 x float> %log2
2902}
2903
2904define <3 x double> @constrained_vector_log2_v3f64() #0 {
2905; CHECK-LABEL: constrained_vector_log2_v3f64:
2906; CHECK:       # %bb.0: # %entry
2907; CHECK-NEXT:    subq $24, %rsp
2908; CHECK-NEXT:    .cfi_def_cfa_offset 32
2909; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2910; CHECK-NEXT:    callq log2@PLT
2911; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
2912; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2913; CHECK-NEXT:    callq log2@PLT
2914; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
2915; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2916; CHECK-NEXT:    callq log2@PLT
2917; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
2918; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
2919; CHECK-NEXT:    wait
2920; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
2921; CHECK-NEXT:    # xmm0 = mem[0],zero
2922; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
2923; CHECK-NEXT:    # xmm1 = mem[0],zero
2924; CHECK-NEXT:    addq $24, %rsp
2925; CHECK-NEXT:    .cfi_def_cfa_offset 8
2926; CHECK-NEXT:    retq
2927;
2928; AVX-LABEL: constrained_vector_log2_v3f64:
2929; AVX:       # %bb.0: # %entry
2930; AVX-NEXT:    subq $40, %rsp
2931; AVX-NEXT:    .cfi_def_cfa_offset 48
2932; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2933; AVX-NEXT:    callq log2@PLT
2934; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2935; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2936; AVX-NEXT:    callq log2@PLT
2937; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2938; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2939; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
2940; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2941; AVX-NEXT:    vzeroupper
2942; AVX-NEXT:    callq log2@PLT
2943; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
2944; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2945; AVX-NEXT:    addq $40, %rsp
2946; AVX-NEXT:    .cfi_def_cfa_offset 8
2947; AVX-NEXT:    retq
2948entry:
2949  %log2 = call <3 x double> @llvm.experimental.constrained.log2.v3f64(
2950                          <3 x double> <double 42.0, double 42.1, double 42.2>,
2951                          metadata !"round.dynamic",
2952                          metadata !"fpexcept.strict") #0
2953  ret <3 x double> %log2
2954}
2955
2956define <4 x double> @constrained_vector_log2_v4f64() #0 {
2957; CHECK-LABEL: constrained_vector_log2_v4f64:
2958; CHECK:       # %bb.0: # %entry
2959; CHECK-NEXT:    subq $40, %rsp
2960; CHECK-NEXT:    .cfi_def_cfa_offset 48
2961; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2962; CHECK-NEXT:    callq log2@PLT
2963; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2964; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2965; CHECK-NEXT:    callq log2@PLT
2966; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
2967; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
2968; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
2969; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
2970; CHECK-NEXT:    callq log2@PLT
2971; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2972; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2973; CHECK-NEXT:    callq log2@PLT
2974; CHECK-NEXT:    movaps %xmm0, %xmm1
2975; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2976; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
2977; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
2978; CHECK-NEXT:    addq $40, %rsp
2979; CHECK-NEXT:    .cfi_def_cfa_offset 8
2980; CHECK-NEXT:    retq
2981;
2982; AVX-LABEL: constrained_vector_log2_v4f64:
2983; AVX:       # %bb.0: # %entry
2984; AVX-NEXT:    subq $40, %rsp
2985; AVX-NEXT:    .cfi_def_cfa_offset 48
2986; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
2987; AVX-NEXT:    callq log2@PLT
2988; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2989; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
2990; AVX-NEXT:    callq log2@PLT
2991; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
2992; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
2993; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
2994; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
2995; AVX-NEXT:    callq log2@PLT
2996; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2997; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
2998; AVX-NEXT:    callq log2@PLT
2999; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
3000; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3001; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
3002; AVX-NEXT:    addq $40, %rsp
3003; AVX-NEXT:    .cfi_def_cfa_offset 8
3004; AVX-NEXT:    retq
3005entry:
3006  %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64(
3007                              <4 x double> <double 42.0, double 42.1,
3008                                            double 42.2, double 42.3>,
3009                              metadata !"round.dynamic",
3010                              metadata !"fpexcept.strict") #0
3011  ret <4 x double> %log2
3012}
3013
3014define <1 x float> @constrained_vector_rint_v1f32_var(ptr %a) #0 {
3015; CHECK-LABEL: constrained_vector_rint_v1f32_var:
3016; CHECK:       # %bb.0: # %entry
3017; CHECK-NEXT:    pushq %rax
3018; CHECK-NEXT:    .cfi_def_cfa_offset 16
3019; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3020; CHECK-NEXT:    callq rintf@PLT
3021; CHECK-NEXT:    popq %rax
3022; CHECK-NEXT:    .cfi_def_cfa_offset 8
3023; CHECK-NEXT:    retq
3024;
3025; AVX-LABEL: constrained_vector_rint_v1f32_var:
3026; AVX:       # %bb.0: # %entry
3027; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3028; AVX-NEXT:    vroundss $4, %xmm0, %xmm0, %xmm0
3029; AVX-NEXT:    retq
3030entry:
3031  %b = load <1 x float>, ptr %a
3032  %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
3033                             <1 x float> %b,
3034                             metadata !"round.dynamic",
3035                             metadata !"fpexcept.strict") #0
3036  ret <1 x float> %rint
3037}
3038
3039define <2 x double> @constrained_vector_rint_v2f64() #0 {
3040; CHECK-LABEL: constrained_vector_rint_v2f64:
3041; CHECK:       # %bb.0: # %entry
3042; CHECK-NEXT:    subq $24, %rsp
3043; CHECK-NEXT:    .cfi_def_cfa_offset 32
3044; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
3045; CHECK-NEXT:    callq rint@PLT
3046; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3047; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
3048; CHECK-NEXT:    callq rint@PLT
3049; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3050; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3051; CHECK-NEXT:    addq $24, %rsp
3052; CHECK-NEXT:    .cfi_def_cfa_offset 8
3053; CHECK-NEXT:    retq
3054;
3055; AVX-LABEL: constrained_vector_rint_v2f64:
3056; AVX:       # %bb.0: # %entry
3057; AVX-NEXT:    vroundpd $4, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3058; AVX-NEXT:    retq
3059entry:
3060  %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
3061                        <2 x double> <double 42.1, double 42.0>,
3062                        metadata !"round.dynamic",
3063                        metadata !"fpexcept.strict") #0
3064  ret <2 x double> %rint
3065}
3066
3067define <2 x double> @constrained_vector_rint_v2f64_var(ptr %a) #0 {
3068; CHECK-LABEL: constrained_vector_rint_v2f64_var:
3069; CHECK:       # %bb.0: # %entry
3070; CHECK-NEXT:    subq $40, %rsp
3071; CHECK-NEXT:    .cfi_def_cfa_offset 48
3072; CHECK-NEXT:    movaps (%rdi), %xmm0
3073; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3074; CHECK-NEXT:    callq rint@PLT
3075; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3076; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3077; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
3078; CHECK-NEXT:    callq rint@PLT
3079; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
3080; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
3081; CHECK-NEXT:    movaps %xmm1, %xmm0
3082; CHECK-NEXT:    addq $40, %rsp
3083; CHECK-NEXT:    .cfi_def_cfa_offset 8
3084; CHECK-NEXT:    retq
3085;
3086; AVX-LABEL: constrained_vector_rint_v2f64_var:
3087; AVX:       # %bb.0: # %entry
3088; AVX-NEXT:    vroundpd $4, (%rdi), %xmm0
3089; AVX-NEXT:    retq
3090entry:
3091  %b = load <2 x double>, ptr %a
3092  %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
3093                        <2 x double> %b,
3094                        metadata !"round.dynamic",
3095                        metadata !"fpexcept.strict") #0
3096  ret <2 x double> %rint
3097}
3098
3099define <3 x float> @constrained_vector_rint_v3f32_var(ptr %a) #0 {
3100; CHECK-LABEL: constrained_vector_rint_v3f32_var:
3101; CHECK:       # %bb.0: # %entry
3102; CHECK-NEXT:    subq $56, %rsp
3103; CHECK-NEXT:    .cfi_def_cfa_offset 64
3104; CHECK-NEXT:    movaps (%rdi), %xmm0
3105; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3106; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
3107; CHECK-NEXT:    callq rintf@PLT
3108; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3109; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3110; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
3111; CHECK-NEXT:    callq rintf@PLT
3112; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3113; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3114; CHECK-NEXT:    callq rintf@PLT
3115; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
3116; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
3117; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
3118; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3119; CHECK-NEXT:    addq $56, %rsp
3120; CHECK-NEXT:    .cfi_def_cfa_offset 8
3121; CHECK-NEXT:    retq
3122;
3123; AVX-LABEL: constrained_vector_rint_v3f32_var:
3124; AVX:       # %bb.0: # %entry
3125; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3126; AVX-NEXT:    vroundss $4, %xmm0, %xmm0, %xmm0
3127; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3128; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
3129; AVX-NEXT:    vroundss $4, %xmm1, %xmm1, %xmm1
3130; AVX-NEXT:    vroundss $4, %xmm2, %xmm2, %xmm2
3131; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
3132; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
3133; AVX-NEXT:    retq
3134 entry:
3135  %b = load <3 x float>, ptr %a
3136  %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
3137                              <3 x float> %b,
3138                              metadata !"round.dynamic",
3139                              metadata !"fpexcept.strict") #0
3140  ret <3 x float> %rint
3141}
3142
3143define <3 x double> @constrained_vector_rint_v3f64() #0 {
3144; CHECK-LABEL: constrained_vector_rint_v3f64:
3145; CHECK:       # %bb.0: # %entry
3146; CHECK-NEXT:    subq $24, %rsp
3147; CHECK-NEXT:    .cfi_def_cfa_offset 32
3148; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
3149; CHECK-NEXT:    callq rint@PLT
3150; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3151; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
3152; CHECK-NEXT:    callq rint@PLT
3153; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
3154; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
3155; CHECK-NEXT:    callq rint@PLT
3156; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
3157; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
3158; CHECK-NEXT:    wait
3159; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
3160; CHECK-NEXT:    # xmm0 = mem[0],zero
3161; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
3162; CHECK-NEXT:    # xmm1 = mem[0],zero
3163; CHECK-NEXT:    addq $24, %rsp
3164; CHECK-NEXT:    .cfi_def_cfa_offset 8
3165; CHECK-NEXT:    retq
3166;
3167; AVX-LABEL: constrained_vector_rint_v3f64:
3168; AVX:       # %bb.0: # %entry
3169; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
3170; AVX-NEXT:    vroundsd $4, %xmm0, %xmm0, %xmm0
3171; AVX-NEXT:    vroundpd $4, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3172; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3173; AVX-NEXT:    retq
3174entry:
3175  %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
3176                          <3 x double> <double 42.0, double 42.1, double 42.2>,
3177                          metadata !"round.dynamic",
3178                          metadata !"fpexcept.strict") #0
3179  ret <3 x double> %rint
3180}
3181
3182define <3 x double> @constrained_vector_rint_v3f64_var(ptr %a) #0 {
3183; CHECK-LABEL: constrained_vector_rint_v3f64_var:
3184; CHECK:       # %bb.0: # %entry
3185; CHECK-NEXT:    subq $40, %rsp
3186; CHECK-NEXT:    .cfi_def_cfa_offset 48
3187; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3188; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3189; CHECK-NEXT:    movaps (%rdi), %xmm0
3190; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3191; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
3192; CHECK-NEXT:    callq rint@PLT
3193; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3194; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3195; CHECK-NEXT:    callq rint@PLT
3196; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
3197; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
3198; CHECK-NEXT:    # xmm0 = mem[0],zero
3199; CHECK-NEXT:    callq rint@PLT
3200; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
3201; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
3202; CHECK-NEXT:    wait
3203; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
3204; CHECK-NEXT:    # xmm0 = mem[0],zero
3205; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
3206; CHECK-NEXT:    # xmm1 = mem[0],zero
3207; CHECK-NEXT:    addq $40, %rsp
3208; CHECK-NEXT:    .cfi_def_cfa_offset 8
3209; CHECK-NEXT:    retq
3210;
3211; AVX-LABEL: constrained_vector_rint_v3f64_var:
3212; AVX:       # %bb.0: # %entry
3213; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3214; AVX-NEXT:    vroundsd $4, %xmm0, %xmm0, %xmm0
3215; AVX-NEXT:    vroundpd $4, (%rdi), %xmm1
3216; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3217; AVX-NEXT:    retq
3218entry:
3219  %b = load <3 x double>, ptr %a
3220  %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
3221                          <3 x double> %b,
3222                          metadata !"round.dynamic",
3223                          metadata !"fpexcept.strict") #0
3224  ret <3 x double> %rint
3225}
3226
3227define <4 x double> @constrained_vector_rint_v4f64() #0 {
3228; CHECK-LABEL: constrained_vector_rint_v4f64:
3229; CHECK:       # %bb.0: # %entry
3230; CHECK-NEXT:    subq $40, %rsp
3231; CHECK-NEXT:    .cfi_def_cfa_offset 48
3232; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
3233; CHECK-NEXT:    callq rint@PLT
3234; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3235; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
3236; CHECK-NEXT:    callq rint@PLT
3237; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3238; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3239; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3240; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0]
3241; CHECK-NEXT:    callq rint@PLT
3242; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3243; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
3244; CHECK-NEXT:    callq rint@PLT
3245; CHECK-NEXT:    movaps %xmm0, %xmm1
3246; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
3247; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
3248; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3249; CHECK-NEXT:    addq $40, %rsp
3250; CHECK-NEXT:    .cfi_def_cfa_offset 8
3251; CHECK-NEXT:    retq
3252;
3253; AVX-LABEL: constrained_vector_rint_v4f64:
3254; AVX:       # %bb.0: # %entry
3255; AVX-NEXT:    vroundpd $4, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
3256; AVX-NEXT:    retq
3257entry:
3258  %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
3259                        <4 x double> <double 42.1, double 42.2,
3260                                      double 42.3, double 42.4>,
3261                        metadata !"round.dynamic",
3262                        metadata !"fpexcept.strict") #0
3263  ret <4 x double> %rint
3264}
3265
3266define <4 x double> @constrained_vector_rint_v4f64_var(ptr %a) #0 {
3267; CHECK-LABEL: constrained_vector_rint_v4f64_var:
3268; CHECK:       # %bb.0: # %entry
3269; CHECK-NEXT:    subq $56, %rsp
3270; CHECK-NEXT:    .cfi_def_cfa_offset 64
3271; CHECK-NEXT:    movaps (%rdi), %xmm1
3272; CHECK-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
3273; CHECK-NEXT:    movaps 16(%rdi), %xmm0
3274; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3275; CHECK-NEXT:    movaps %xmm1, %xmm0
3276; CHECK-NEXT:    callq rint@PLT
3277; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3278; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3279; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
3280; CHECK-NEXT:    callq rint@PLT
3281; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
3282; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
3283; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3284; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
3285; CHECK-NEXT:    callq rint@PLT
3286; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3287; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
3288; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
3289; CHECK-NEXT:    callq rint@PLT
3290; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
3291; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
3292; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
3293; CHECK-NEXT:    addq $56, %rsp
3294; CHECK-NEXT:    .cfi_def_cfa_offset 8
3295; CHECK-NEXT:    retq
3296;
3297; AVX-LABEL: constrained_vector_rint_v4f64_var:
3298; AVX:       # %bb.0: # %entry
3299; AVX-NEXT:    vroundpd $4, (%rdi), %ymm0
3300; AVX-NEXT:    retq
3301entry:
3302  %b = load <4 x double>, ptr %a
3303  %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
3304                        <4 x double> %b,
3305                        metadata !"round.dynamic",
3306                        metadata !"fpexcept.strict") #0
3307  ret <4 x double> %rint
3308}
3309
3310define <1 x float> @constrained_vector_nearbyint_v1f32_var(ptr %a) #0 {
3311; CHECK-LABEL: constrained_vector_nearbyint_v1f32_var:
3312; CHECK:       # %bb.0: # %entry
3313; CHECK-NEXT:    pushq %rax
3314; CHECK-NEXT:    .cfi_def_cfa_offset 16
3315; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3316; CHECK-NEXT:    callq nearbyintf@PLT
3317; CHECK-NEXT:    popq %rax
3318; CHECK-NEXT:    .cfi_def_cfa_offset 8
3319; CHECK-NEXT:    retq
3320;
3321; AVX-LABEL: constrained_vector_nearbyint_v1f32_var:
3322; AVX:       # %bb.0: # %entry
3323; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3324; AVX-NEXT:    vroundss $12, %xmm0, %xmm0, %xmm0
3325; AVX-NEXT:    retq
3326entry:
3327  %b = load <1 x float>, ptr %a
3328  %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
3329                               <1 x float> %b,
3330                               metadata !"round.dynamic",
3331                               metadata !"fpexcept.strict") #0
3332  ret <1 x float> %nearby
3333}
3334
3335define <2 x double> @constrained_vector_nearbyint_v2f64() #0 {
3336; CHECK-LABEL: constrained_vector_nearbyint_v2f64:
3337; CHECK:       # %bb.0: # %entry
3338; CHECK-NEXT:    subq $24, %rsp
3339; CHECK-NEXT:    .cfi_def_cfa_offset 32
3340; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
3341; CHECK-NEXT:    callq nearbyint@PLT
3342; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3343; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
3344; CHECK-NEXT:    callq nearbyint@PLT
3345; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3346; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3347; CHECK-NEXT:    addq $24, %rsp
3348; CHECK-NEXT:    .cfi_def_cfa_offset 8
3349; CHECK-NEXT:    retq
3350;
3351; AVX-LABEL: constrained_vector_nearbyint_v2f64:
3352; AVX:       # %bb.0: # %entry
3353; AVX-NEXT:    vroundpd $12, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3354; AVX-NEXT:    retq
3355entry:
3356  %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
3357                                <2 x double> <double 42.1, double 42.0>,
3358                                metadata !"round.dynamic",
3359                                metadata !"fpexcept.strict") #0
3360  ret <2 x double> %nearby
3361}
3362
3363define <2 x double> @constrained_vector_nearbyint_v2f64_var(ptr %a) #0 {
3364; CHECK-LABEL: constrained_vector_nearbyint_v2f64_var:
3365; CHECK:       # %bb.0: # %entry
3366; CHECK-NEXT:    subq $40, %rsp
3367; CHECK-NEXT:    .cfi_def_cfa_offset 48
3368; CHECK-NEXT:    movaps (%rdi), %xmm0
3369; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3370; CHECK-NEXT:    callq nearbyint@PLT
3371; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3372; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3373; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
3374; CHECK-NEXT:    callq nearbyint@PLT
3375; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
3376; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
3377; CHECK-NEXT:    movaps %xmm1, %xmm0
3378; CHECK-NEXT:    addq $40, %rsp
3379; CHECK-NEXT:    .cfi_def_cfa_offset 8
3380; CHECK-NEXT:    retq
3381;
3382; AVX-LABEL: constrained_vector_nearbyint_v2f64_var:
3383; AVX:       # %bb.0: # %entry
3384; AVX-NEXT:    vroundpd $12, (%rdi), %xmm0
3385; AVX-NEXT:    retq
3386entry:
3387  %b = load <2 x double>, ptr %a
3388  %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
3389                                <2 x double> %b,
3390                                metadata !"round.dynamic",
3391                                metadata !"fpexcept.strict") #0
3392  ret <2 x double> %nearby
3393}
3394
3395define <3 x float> @constrained_vector_nearbyint_v3f32_var(ptr %a) #0 {
3396; CHECK-LABEL: constrained_vector_nearbyint_v3f32_var:
3397; CHECK:       # %bb.0: # %entry
3398; CHECK-NEXT:    subq $56, %rsp
3399; CHECK-NEXT:    .cfi_def_cfa_offset 64
3400; CHECK-NEXT:    movaps (%rdi), %xmm0
3401; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3402; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
3403; CHECK-NEXT:    callq nearbyintf@PLT
3404; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3405; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3406; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
3407; CHECK-NEXT:    callq nearbyintf@PLT
3408; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3409; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3410; CHECK-NEXT:    callq nearbyintf@PLT
3411; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
3412; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
3413; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
3414; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3415; CHECK-NEXT:    addq $56, %rsp
3416; CHECK-NEXT:    .cfi_def_cfa_offset 8
3417; CHECK-NEXT:    retq
3418;
3419; AVX-LABEL: constrained_vector_nearbyint_v3f32_var:
3420; AVX:       # %bb.0: # %entry
3421; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
3422; AVX-NEXT:    vroundss $12, %xmm0, %xmm0, %xmm0
3423; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
3424; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
3425; AVX-NEXT:    vroundss $12, %xmm1, %xmm1, %xmm1
3426; AVX-NEXT:    vroundss $12, %xmm2, %xmm2, %xmm2
3427; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
3428; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
3429; AVX-NEXT:    retq
3430entry:
3431  %b = load <3 x float>, ptr %a
3432  %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
3433                              <3 x float> %b,
3434                              metadata !"round.dynamic",
3435                              metadata !"fpexcept.strict") #0
3436  ret <3 x float> %nearby
3437}
3438
3439define <3 x double> @constrained_vector_nearby_v3f64() #0 {
3440; CHECK-LABEL: constrained_vector_nearby_v3f64:
3441; CHECK:       # %bb.0: # %entry
3442; CHECK-NEXT:    subq $24, %rsp
3443; CHECK-NEXT:    .cfi_def_cfa_offset 32
3444; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
3445; CHECK-NEXT:    callq nearbyint@PLT
3446; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3447; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
3448; CHECK-NEXT:    callq nearbyint@PLT
3449; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
3450; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
3451; CHECK-NEXT:    callq nearbyint@PLT
3452; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
3453; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
3454; CHECK-NEXT:    wait
3455; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
3456; CHECK-NEXT:    # xmm0 = mem[0],zero
3457; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
3458; CHECK-NEXT:    # xmm1 = mem[0],zero
3459; CHECK-NEXT:    addq $24, %rsp
3460; CHECK-NEXT:    .cfi_def_cfa_offset 8
3461; CHECK-NEXT:    retq
3462;
3463; AVX-LABEL: constrained_vector_nearby_v3f64:
3464; AVX:       # %bb.0: # %entry
3465; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
3466; AVX-NEXT:    vroundsd $12, %xmm0, %xmm0, %xmm0
3467; AVX-NEXT:    vroundpd $12, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3468; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3469; AVX-NEXT:    retq
3470entry:
3471  %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
3472                          <3 x double> <double 42.0, double 42.1, double 42.2>,
3473                          metadata !"round.dynamic",
3474                          metadata !"fpexcept.strict") #0
3475  ret <3 x double> %nearby
3476}
3477
3478define <3 x double> @constrained_vector_nearbyint_v3f64_var(ptr %a) #0 {
3479; CHECK-LABEL: constrained_vector_nearbyint_v3f64_var:
3480; CHECK:       # %bb.0: # %entry
3481; CHECK-NEXT:    subq $40, %rsp
3482; CHECK-NEXT:    .cfi_def_cfa_offset 48
3483; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
3484; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3485; CHECK-NEXT:    movaps (%rdi), %xmm0
3486; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3487; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
3488; CHECK-NEXT:    callq nearbyint@PLT
3489; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3490; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3491; CHECK-NEXT:    callq nearbyint@PLT
3492; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
3493; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
3494; CHECK-NEXT:    # xmm0 = mem[0],zero
3495; CHECK-NEXT:    callq nearbyint@PLT
3496; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
3497; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
3498; CHECK-NEXT:    wait
3499; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
3500; CHECK-NEXT:    # xmm0 = mem[0],zero
3501; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
3502; CHECK-NEXT:    # xmm1 = mem[0],zero
3503; CHECK-NEXT:    addq $40, %rsp
3504; CHECK-NEXT:    .cfi_def_cfa_offset 8
3505; CHECK-NEXT:    retq
3506;
3507; AVX-LABEL: constrained_vector_nearbyint_v3f64_var:
3508; AVX:       # %bb.0: # %entry
3509; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
3510; AVX-NEXT:    vroundsd $12, %xmm0, %xmm0, %xmm0
3511; AVX-NEXT:    vroundpd $12, (%rdi), %xmm1
3512; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3513; AVX-NEXT:    retq
3514entry:
3515  %b = load <3 x double>, ptr %a
3516  %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
3517                          <3 x double> %b,
3518                          metadata !"round.dynamic",
3519                          metadata !"fpexcept.strict") #0
3520  ret <3 x double> %nearby
3521}
3522
3523define <4 x double> @constrained_vector_nearbyint_v4f64() #0 {
3524; CHECK-LABEL: constrained_vector_nearbyint_v4f64:
3525; CHECK:       # %bb.0: # %entry
3526; CHECK-NEXT:    subq $40, %rsp
3527; CHECK-NEXT:    .cfi_def_cfa_offset 48
3528; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
3529; CHECK-NEXT:    callq nearbyint@PLT
3530; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3531; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
3532; CHECK-NEXT:    callq nearbyint@PLT
3533; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3534; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3535; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3536; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0]
3537; CHECK-NEXT:    callq nearbyint@PLT
3538; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3539; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
3540; CHECK-NEXT:    callq nearbyint@PLT
3541; CHECK-NEXT:    movaps %xmm0, %xmm1
3542; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
3543; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
3544; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3545; CHECK-NEXT:    addq $40, %rsp
3546; CHECK-NEXT:    .cfi_def_cfa_offset 8
3547; CHECK-NEXT:    retq
3548;
3549; AVX-LABEL: constrained_vector_nearbyint_v4f64:
3550; AVX:       # %bb.0: # %entry
3551; AVX-NEXT:    vroundpd $12, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
3552; AVX-NEXT:    retq
3553entry:
3554  %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
3555                                <4 x double> <double 42.1, double 42.2,
3556                                              double 42.3, double 42.4>,
3557                                metadata !"round.dynamic",
3558                                metadata !"fpexcept.strict") #0
3559  ret <4 x double> %nearby
3560}
3561
3562define <4 x double> @constrained_vector_nearbyint_v4f64_var(ptr %a) #0 {
3563; CHECK-LABEL: constrained_vector_nearbyint_v4f64_var:
3564; CHECK:       # %bb.0: # %entry
3565; CHECK-NEXT:    subq $56, %rsp
3566; CHECK-NEXT:    .cfi_def_cfa_offset 64
3567; CHECK-NEXT:    movaps (%rdi), %xmm1
3568; CHECK-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
3569; CHECK-NEXT:    movaps 16(%rdi), %xmm0
3570; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3571; CHECK-NEXT:    movaps %xmm1, %xmm0
3572; CHECK-NEXT:    callq nearbyint@PLT
3573; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3574; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3575; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
3576; CHECK-NEXT:    callq nearbyint@PLT
3577; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
3578; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
3579; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3580; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
3581; CHECK-NEXT:    callq nearbyint@PLT
3582; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3583; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
3584; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
3585; CHECK-NEXT:    callq nearbyint@PLT
3586; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
3587; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
3588; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
3589; CHECK-NEXT:    addq $56, %rsp
3590; CHECK-NEXT:    .cfi_def_cfa_offset 8
3591; CHECK-NEXT:    retq
3592;
3593; AVX-LABEL: constrained_vector_nearbyint_v4f64_var:
3594; AVX:       # %bb.0: # %entry
3595; AVX-NEXT:    vroundpd $12, (%rdi), %ymm0
3596; AVX-NEXT:    retq
3597entry:
3598  %b = load <4 x double>, ptr %a
3599  %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
3600                                <4 x double> %b,
3601                                metadata !"round.dynamic",
3602                                metadata !"fpexcept.strict") #0
3603  ret <4 x double> %nearby
3604}
3605
3606define <1 x float> @constrained_vector_maxnum_v1f32() #0 {
3607; CHECK-LABEL: constrained_vector_maxnum_v1f32:
3608; CHECK:       # %bb.0: # %entry
3609; CHECK-NEXT:    pushq %rax
3610; CHECK-NEXT:    .cfi_def_cfa_offset 16
3611; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
3612; CHECK-NEXT:    movss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0]
3613; CHECK-NEXT:    callq fmaxf@PLT
3614; CHECK-NEXT:    popq %rax
3615; CHECK-NEXT:    .cfi_def_cfa_offset 8
3616; CHECK-NEXT:    retq
3617;
3618; AVX-LABEL: constrained_vector_maxnum_v1f32:
3619; AVX:       # %bb.0: # %entry
3620; AVX-NEXT:    pushq %rax
3621; AVX-NEXT:    .cfi_def_cfa_offset 16
3622; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
3623; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0]
3624; AVX-NEXT:    callq fmaxf@PLT
3625; AVX-NEXT:    popq %rax
3626; AVX-NEXT:    .cfi_def_cfa_offset 8
3627; AVX-NEXT:    retq
3628entry:
3629  %max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32(
3630                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
3631                               metadata !"fpexcept.strict") #0
3632  ret <1 x float> %max
3633}
3634
3635define <2 x double> @constrained_vector_maxnum_v2f64() #0 {
3636; CHECK-LABEL: constrained_vector_maxnum_v2f64:
3637; CHECK:       # %bb.0: # %entry
3638; CHECK-NEXT:    subq $24, %rsp
3639; CHECK-NEXT:    .cfi_def_cfa_offset 32
3640; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
3641; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0]
3642; CHECK-NEXT:    callq fmax@PLT
3643; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3644; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0]
3645; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0]
3646; CHECK-NEXT:    callq fmax@PLT
3647; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3648; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3649; CHECK-NEXT:    addq $24, %rsp
3650; CHECK-NEXT:    .cfi_def_cfa_offset 8
3651; CHECK-NEXT:    retq
3652;
3653; AVX-LABEL: constrained_vector_maxnum_v2f64:
3654; AVX:       # %bb.0: # %entry
3655; AVX-NEXT:    subq $24, %rsp
3656; AVX-NEXT:    .cfi_def_cfa_offset 32
3657; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
3658; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0]
3659; AVX-NEXT:    callq fmax@PLT
3660; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3661; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0]
3662; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0]
3663; AVX-NEXT:    callq fmax@PLT
3664; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
3665; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3666; AVX-NEXT:    addq $24, %rsp
3667; AVX-NEXT:    .cfi_def_cfa_offset 8
3668; AVX-NEXT:    retq
3669entry:
3670  %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
3671                                <2 x double> <double 43.0, double 42.0>,
3672                                <2 x double> <double 41.0, double 40.0>,
3673                                metadata !"fpexcept.strict") #0
3674  ret <2 x double> %max
3675}
3676
3677define <3 x float> @constrained_vector_maxnum_v3f32() #0 {
3678; CHECK-LABEL: constrained_vector_maxnum_v3f32:
3679; CHECK:       # %bb.0: # %entry
3680; CHECK-NEXT:    subq $40, %rsp
3681; CHECK-NEXT:    .cfi_def_cfa_offset 48
3682; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0]
3683; CHECK-NEXT:    movss {{.*#+}} xmm1 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
3684; CHECK-NEXT:    callq fmaxf@PLT
3685; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3686; CHECK-NEXT:    movss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0]
3687; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
3688; CHECK-NEXT:    callq fmaxf@PLT
3689; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3690; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
3691; CHECK-NEXT:    movss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
3692; CHECK-NEXT:    callq fmaxf@PLT
3693; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
3694; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3695; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
3696; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
3697; CHECK-NEXT:    movaps %xmm1, %xmm0
3698; CHECK-NEXT:    addq $40, %rsp
3699; CHECK-NEXT:    .cfi_def_cfa_offset 8
3700; CHECK-NEXT:    retq
3701;
3702; AVX-LABEL: constrained_vector_maxnum_v3f32:
3703; AVX:       # %bb.0: # %entry
3704; AVX-NEXT:    subq $40, %rsp
3705; AVX-NEXT:    .cfi_def_cfa_offset 48
3706; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0]
3707; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
3708; AVX-NEXT:    callq fmaxf@PLT
3709; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3710; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0]
3711; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
3712; AVX-NEXT:    callq fmaxf@PLT
3713; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3714; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
3715; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
3716; AVX-NEXT:    callq fmaxf@PLT
3717; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
3718; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3719; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
3720; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
3721; AVX-NEXT:    addq $40, %rsp
3722; AVX-NEXT:    .cfi_def_cfa_offset 8
3723; AVX-NEXT:    retq
3724entry:
3725  %max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32(
3726                              <3 x float> <float 43.0, float 44.0, float 45.0>,
3727                              <3 x float> <float 41.0, float 42.0, float 43.0>,
3728                              metadata !"fpexcept.strict") #0
3729  ret <3 x float> %max
3730}
3731
3732define <3 x double> @constrained_vector_max_v3f64() #0 {
3733; CHECK-LABEL: constrained_vector_max_v3f64:
3734; CHECK:       # %bb.0: # %entry
3735; CHECK-NEXT:    subq $24, %rsp
3736; CHECK-NEXT:    .cfi_def_cfa_offset 32
3737; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0]
3738; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0]
3739; CHECK-NEXT:    callq fmax@PLT
3740; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3741; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0]
3742; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0]
3743; CHECK-NEXT:    callq fmax@PLT
3744; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
3745; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0]
3746; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
3747; CHECK-NEXT:    callq fmax@PLT
3748; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
3749; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
3750; CHECK-NEXT:    wait
3751; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
3752; CHECK-NEXT:    # xmm0 = mem[0],zero
3753; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
3754; CHECK-NEXT:    # xmm1 = mem[0],zero
3755; CHECK-NEXT:    addq $24, %rsp
3756; CHECK-NEXT:    .cfi_def_cfa_offset 8
3757; CHECK-NEXT:    retq
3758;
3759; AVX-LABEL: constrained_vector_max_v3f64:
3760; AVX:       # %bb.0: # %entry
3761; AVX-NEXT:    subq $40, %rsp
3762; AVX-NEXT:    .cfi_def_cfa_offset 48
3763; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0]
3764; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0]
3765; AVX-NEXT:    callq fmax@PLT
3766; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3767; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0]
3768; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0]
3769; AVX-NEXT:    callq fmax@PLT
3770; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
3771; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3772; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
3773; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0]
3774; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
3775; AVX-NEXT:    vzeroupper
3776; AVX-NEXT:    callq fmax@PLT
3777; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
3778; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
3779; AVX-NEXT:    addq $40, %rsp
3780; AVX-NEXT:    .cfi_def_cfa_offset 8
3781; AVX-NEXT:    retq
3782entry:
3783  %max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64(
3784                          <3 x double> <double 43.0, double 44.0, double 45.0>,
3785                          <3 x double> <double 40.0, double 41.0, double 42.0>,
3786                          metadata !"fpexcept.strict") #0
3787  ret <3 x double> %max
3788}
3789
3790define <4 x double> @constrained_vector_maxnum_v4f64() #0 {
3791; CHECK-LABEL: constrained_vector_maxnum_v4f64:
3792; CHECK:       # %bb.0: # %entry
3793; CHECK-NEXT:    subq $40, %rsp
3794; CHECK-NEXT:    .cfi_def_cfa_offset 48
3795; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0]
3796; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0]
3797; CHECK-NEXT:    callq fmax@PLT
3798; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3799; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0]
3800; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0]
3801; CHECK-NEXT:    callq fmax@PLT
3802; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3803; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3804; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3805; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.7E+1,0.0E+0]
3806; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.3E+1,0.0E+0]
3807; CHECK-NEXT:    callq fmax@PLT
3808; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3809; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.6E+1,0.0E+0]
3810; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
3811; CHECK-NEXT:    callq fmax@PLT
3812; CHECK-NEXT:    movaps %xmm0, %xmm1
3813; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
3814; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
3815; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
3816; CHECK-NEXT:    addq $40, %rsp
3817; CHECK-NEXT:    .cfi_def_cfa_offset 8
3818; CHECK-NEXT:    retq
3819;
3820; AVX-LABEL: constrained_vector_maxnum_v4f64:
3821; AVX:       # %bb.0: # %entry
3822; AVX-NEXT:    subq $40, %rsp
3823; AVX-NEXT:    .cfi_def_cfa_offset 48
3824; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.7E+1,0.0E+0]
3825; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.3E+1,0.0E+0]
3826; AVX-NEXT:    callq fmax@PLT
3827; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3828; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.6E+1,0.0E+0]
3829; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
3830; AVX-NEXT:    callq fmax@PLT
3831; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
3832; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3833; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3834; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0]
3835; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0]
3836; AVX-NEXT:    callq fmax@PLT
3837; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3838; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0]
3839; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0]
3840; AVX-NEXT:    callq fmax@PLT
3841; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
3842; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3843; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
3844; AVX-NEXT:    addq $40, %rsp
3845; AVX-NEXT:    .cfi_def_cfa_offset 8
3846; AVX-NEXT:    retq
3847entry:
3848  %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64(
3849                                <4 x double> <double 44.0, double 45.0,
3850                                              double 46.0, double 47.0>,
3851                                <4 x double> <double 40.0, double 41.0,
3852                                              double 42.0, double 43.0>,
3853                                metadata !"fpexcept.strict") #0
3854  ret <4 x double> %max
3855}
3856
3857define <1 x float> @constrained_vector_minnum_v1f32() #0 {
3858; CHECK-LABEL: constrained_vector_minnum_v1f32:
3859; CHECK:       # %bb.0: # %entry
3860; CHECK-NEXT:    pushq %rax
3861; CHECK-NEXT:    .cfi_def_cfa_offset 16
3862; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
3863; CHECK-NEXT:    movss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0]
3864; CHECK-NEXT:    callq fminf@PLT
3865; CHECK-NEXT:    popq %rax
3866; CHECK-NEXT:    .cfi_def_cfa_offset 8
3867; CHECK-NEXT:    retq
3868;
3869; AVX-LABEL: constrained_vector_minnum_v1f32:
3870; AVX:       # %bb.0: # %entry
3871; AVX-NEXT:    pushq %rax
3872; AVX-NEXT:    .cfi_def_cfa_offset 16
3873; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
3874; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0]
3875; AVX-NEXT:    callq fminf@PLT
3876; AVX-NEXT:    popq %rax
3877; AVX-NEXT:    .cfi_def_cfa_offset 8
3878; AVX-NEXT:    retq
3879 entry:
3880  %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32(
3881                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
3882                               metadata !"fpexcept.strict") #0
3883  ret <1 x float> %min
3884}
3885
3886define <2 x double> @constrained_vector_minnum_v2f64() #0 {
3887; CHECK-LABEL: constrained_vector_minnum_v2f64:
3888; CHECK:       # %bb.0: # %entry
3889; CHECK-NEXT:    subq $24, %rsp
3890; CHECK-NEXT:    .cfi_def_cfa_offset 32
3891; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
3892; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0]
3893; CHECK-NEXT:    callq fmin@PLT
3894; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3895; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0]
3896; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0]
3897; CHECK-NEXT:    callq fmin@PLT
3898; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
3899; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
3900; CHECK-NEXT:    addq $24, %rsp
3901; CHECK-NEXT:    .cfi_def_cfa_offset 8
3902; CHECK-NEXT:    retq
3903;
3904; AVX-LABEL: constrained_vector_minnum_v2f64:
3905; AVX:       # %bb.0: # %entry
3906; AVX-NEXT:    subq $24, %rsp
3907; AVX-NEXT:    .cfi_def_cfa_offset 32
3908; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
3909; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0]
3910; AVX-NEXT:    callq fmin@PLT
3911; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3912; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0]
3913; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0]
3914; AVX-NEXT:    callq fmin@PLT
3915; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
3916; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
3917; AVX-NEXT:    addq $24, %rsp
3918; AVX-NEXT:    .cfi_def_cfa_offset 8
3919; AVX-NEXT:    retq
3920entry:
3921  %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
3922                                <2 x double> <double 43.0, double 42.0>,
3923                                <2 x double> <double 41.0, double 40.0>,
3924                                metadata !"fpexcept.strict") #0
3925  ret <2 x double> %min
3926}
3927
3928define <3 x float> @constrained_vector_minnum_v3f32() #0 {
3929; CHECK-LABEL: constrained_vector_minnum_v3f32:
3930; CHECK:       # %bb.0: # %entry
3931; CHECK-NEXT:    subq $40, %rsp
3932; CHECK-NEXT:    .cfi_def_cfa_offset 48
3933; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0]
3934; CHECK-NEXT:    movss {{.*#+}} xmm1 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
3935; CHECK-NEXT:    callq fminf@PLT
3936; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3937; CHECK-NEXT:    movss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0]
3938; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
3939; CHECK-NEXT:    callq fminf@PLT
3940; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
3941; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
3942; CHECK-NEXT:    movss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
3943; CHECK-NEXT:    callq fminf@PLT
3944; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
3945; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3946; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
3947; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
3948; CHECK-NEXT:    movaps %xmm1, %xmm0
3949; CHECK-NEXT:    addq $40, %rsp
3950; CHECK-NEXT:    .cfi_def_cfa_offset 8
3951; CHECK-NEXT:    retq
3952;
3953; AVX-LABEL: constrained_vector_minnum_v3f32:
3954; AVX:       # %bb.0: # %entry
3955; AVX-NEXT:    subq $40, %rsp
3956; AVX-NEXT:    .cfi_def_cfa_offset 48
3957; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0]
3958; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
3959; AVX-NEXT:    callq fminf@PLT
3960; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3961; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0]
3962; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
3963; AVX-NEXT:    callq fminf@PLT
3964; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
3965; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
3966; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
3967; AVX-NEXT:    callq fminf@PLT
3968; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
3969; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
3970; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
3971; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
3972; AVX-NEXT:    addq $40, %rsp
3973; AVX-NEXT:    .cfi_def_cfa_offset 8
3974; AVX-NEXT:    retq
3975entry:
3976  %min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32(
3977                              <3 x float> <float 43.0, float 44.0, float 45.0>,
3978                              <3 x float> <float 41.0, float 42.0, float 43.0>,
3979                              metadata !"fpexcept.strict") #0
3980  ret <3 x float> %min
3981}
3982
3983define <3 x double> @constrained_vector_min_v3f64() #0 {
3984; CHECK-LABEL: constrained_vector_min_v3f64:
3985; CHECK:       # %bb.0: # %entry
3986; CHECK-NEXT:    subq $24, %rsp
3987; CHECK-NEXT:    .cfi_def_cfa_offset 32
3988; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0]
3989; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0]
3990; CHECK-NEXT:    callq fmin@PLT
3991; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
3992; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0]
3993; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0]
3994; CHECK-NEXT:    callq fmin@PLT
3995; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
3996; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0]
3997; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
3998; CHECK-NEXT:    callq fmin@PLT
3999; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
4000; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
4001; CHECK-NEXT:    wait
4002; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
4003; CHECK-NEXT:    # xmm0 = mem[0],zero
4004; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
4005; CHECK-NEXT:    # xmm1 = mem[0],zero
4006; CHECK-NEXT:    addq $24, %rsp
4007; CHECK-NEXT:    .cfi_def_cfa_offset 8
4008; CHECK-NEXT:    retq
4009;
4010; AVX-LABEL: constrained_vector_min_v3f64:
4011; AVX:       # %bb.0: # %entry
4012; AVX-NEXT:    subq $40, %rsp
4013; AVX-NEXT:    .cfi_def_cfa_offset 48
4014; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0]
4015; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0]
4016; AVX-NEXT:    callq fmin@PLT
4017; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
4018; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0]
4019; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0]
4020; AVX-NEXT:    callq fmin@PLT
4021; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
4022; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
4023; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
4024; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0]
4025; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
4026; AVX-NEXT:    vzeroupper
4027; AVX-NEXT:    callq fmin@PLT
4028; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
4029; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4030; AVX-NEXT:    addq $40, %rsp
4031; AVX-NEXT:    .cfi_def_cfa_offset 8
4032; AVX-NEXT:    retq
4033entry:
4034 %min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64(
4035                          <3 x double> <double 43.0, double 44.0, double 45.0>,
4036                          <3 x double> <double 40.0, double 41.0, double 42.0>,
4037                          metadata !"fpexcept.strict") #0
4038  ret <3 x double> %min
4039}
4040
4041define <4 x double> @constrained_vector_minnum_v4f64() #0 {
4042; CHECK-LABEL: constrained_vector_minnum_v4f64:
4043; CHECK:       # %bb.0: # %entry
4044; CHECK-NEXT:    subq $40, %rsp
4045; CHECK-NEXT:    .cfi_def_cfa_offset 48
4046; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0]
4047; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0]
4048; CHECK-NEXT:    callq fmin@PLT
4049; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
4050; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0]
4051; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0]
4052; CHECK-NEXT:    callq fmin@PLT
4053; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
4054; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
4055; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
4056; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.7E+1,0.0E+0]
4057; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.3E+1,0.0E+0]
4058; CHECK-NEXT:    callq fmin@PLT
4059; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
4060; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.6E+1,0.0E+0]
4061; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
4062; CHECK-NEXT:    callq fmin@PLT
4063; CHECK-NEXT:    movaps %xmm0, %xmm1
4064; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
4065; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
4066; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
4067; CHECK-NEXT:    addq $40, %rsp
4068; CHECK-NEXT:    .cfi_def_cfa_offset 8
4069; CHECK-NEXT:    retq
4070;
4071; AVX-LABEL: constrained_vector_minnum_v4f64:
4072; AVX:       # %bb.0: # %entry
4073; AVX-NEXT:    subq $40, %rsp
4074; AVX-NEXT:    .cfi_def_cfa_offset 48
4075; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.7E+1,0.0E+0]
4076; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.3E+1,0.0E+0]
4077; AVX-NEXT:    callq fmin@PLT
4078; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
4079; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.6E+1,0.0E+0]
4080; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0]
4081; AVX-NEXT:    callq fmin@PLT
4082; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
4083; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
4084; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
4085; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0]
4086; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0]
4087; AVX-NEXT:    callq fmin@PLT
4088; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
4089; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0]
4090; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0]
4091; AVX-NEXT:    callq fmin@PLT
4092; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
4093; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
4094; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
4095; AVX-NEXT:    addq $40, %rsp
4096; AVX-NEXT:    .cfi_def_cfa_offset 8
4097; AVX-NEXT:    retq
4098entry:
4099  %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(
4100                                <4 x double> <double 44.0, double 45.0,
4101                                              double 46.0, double 47.0>,
4102                                <4 x double> <double 40.0, double 41.0,
4103                                              double 42.0, double 43.0>,
4104                                metadata !"fpexcept.strict") #0
4105  ret <4 x double> %min
4106}
4107
4108define <1 x i32> @constrained_vector_fptosi_v1i32_v1f32() #0 {
4109; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f32:
4110; CHECK:       # %bb.0: # %entry
4111; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4112; CHECK-NEXT:    retq
4113;
4114; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f32:
4115; AVX:       # %bb.0: # %entry
4116; AVX-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4117; AVX-NEXT:    retq
4118entry:
4119  %result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32(
4120                               <1 x float><float 42.0>,
4121                               metadata !"fpexcept.strict") #0
4122  ret <1 x i32> %result
4123}
4124
4125define <2 x i32> @constrained_vector_fptosi_v2i32_v2f32() #0 {
4126; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f32:
4127; CHECK:       # %bb.0: # %entry
4128; CHECK-NEXT:    cvttps2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4129; CHECK-NEXT:    retq
4130;
4131; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f32:
4132; AVX:       # %bb.0: # %entry
4133; AVX-NEXT:    vcvttps2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4134; AVX-NEXT:    retq
4135entry:
4136  %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(
4137                                <2 x float><float 42.0, float 43.0>,
4138                                metadata !"fpexcept.strict") #0
4139  ret <2 x i32> %result
4140}
4141
4142define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32() #0 {
4143; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f32:
4144; CHECK:       # %bb.0: # %entry
4145; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4146; CHECK-NEXT:    movd %eax, %xmm1
4147; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4148; CHECK-NEXT:    movd %eax, %xmm0
4149; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4150; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4151; CHECK-NEXT:    movd %eax, %xmm1
4152; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4153; CHECK-NEXT:    retq
4154;
4155; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f32:
4156; AVX:       # %bb.0: # %entry
4157; AVX-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4158; AVX-NEXT:    vmovd %eax, %xmm0
4159; AVX-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4160; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
4161; AVX-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4162; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
4163; AVX-NEXT:    retq
4164entry:
4165  %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32(
4166                                <3 x float><float 42.0, float 43.0,
4167                                            float 44.0>,
4168                                metadata !"fpexcept.strict") #0
4169  ret <3 x i32> %result
4170}
4171
4172define <4 x i32> @constrained_vector_fptosi_v4i32_v4f32() #0 {
4173; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f32:
4174; CHECK:       # %bb.0: # %entry
4175; CHECK-NEXT:    cvttps2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4176; CHECK-NEXT:    retq
4177;
4178; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f32:
4179; AVX:       # %bb.0: # %entry
4180; AVX-NEXT:    vcvttps2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4181; AVX-NEXT:    retq
4182entry:
4183  %result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(
4184                                <4 x float><float 42.0, float 43.0,
4185                                            float 44.0, float 45.0>,
4186                                metadata !"fpexcept.strict") #0
4187  ret <4 x i32> %result
4188}
4189
4190define <1 x i64> @constrained_vector_fptosi_v1i64_v1f32() #0 {
4191; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f32:
4192; CHECK:       # %bb.0: # %entry
4193; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4194; CHECK-NEXT:    retq
4195;
4196; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f32:
4197; AVX:       # %bb.0: # %entry
4198; AVX-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4199; AVX-NEXT:    retq
4200entry:
4201  %result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32(
4202                               <1 x float><float 42.0>,
4203                               metadata !"fpexcept.strict") #0
4204  ret <1 x i64> %result
4205}
4206
4207define <2 x i64> @constrained_vector_fptosi_v2i64_v2f32() #0 {
4208; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f32:
4209; CHECK:       # %bb.0: # %entry
4210; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4211; CHECK-NEXT:    movq %rax, %xmm1
4212; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4213; CHECK-NEXT:    movq %rax, %xmm0
4214; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4215; CHECK-NEXT:    retq
4216;
4217; AVX1-LABEL: constrained_vector_fptosi_v2i64_v2f32:
4218; AVX1:       # %bb.0: # %entry
4219; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4220; AVX1-NEXT:    vmovq %rax, %xmm0
4221; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4222; AVX1-NEXT:    vmovq %rax, %xmm1
4223; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4224; AVX1-NEXT:    retq
4225;
4226; AVX512F-LABEL: constrained_vector_fptosi_v2i64_v2f32:
4227; AVX512F:       # %bb.0: # %entry
4228; AVX512F-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4229; AVX512F-NEXT:    vmovq %rax, %xmm0
4230; AVX512F-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4231; AVX512F-NEXT:    vmovq %rax, %xmm1
4232; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4233; AVX512F-NEXT:    retq
4234;
4235; AVX512DQ-LABEL: constrained_vector_fptosi_v2i64_v2f32:
4236; AVX512DQ:       # %bb.0: # %entry
4237; AVX512DQ-NEXT:    vcvttps2qq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0
4238; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
4239; AVX512DQ-NEXT:    vzeroupper
4240; AVX512DQ-NEXT:    retq
4241entry:
4242  %result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(
4243                                <2 x float><float 42.0, float 43.0>,
4244                                metadata !"fpexcept.strict") #0
4245  ret <2 x i64> %result
4246}
4247
4248define <3 x i64> @constrained_vector_fptosi_v3i64_v3f32() #0 {
4249; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f32:
4250; CHECK:       # %bb.0: # %entry
4251; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
4252; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx
4253; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4254; CHECK-NEXT:    retq
4255;
4256; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f32:
4257; AVX1:       # %bb.0: # %entry
4258; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4259; AVX1-NEXT:    vmovq %rax, %xmm0
4260; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4261; AVX1-NEXT:    vmovq %rax, %xmm1
4262; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4263; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4264; AVX1-NEXT:    vmovq %rax, %xmm1
4265; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4266; AVX1-NEXT:    retq
4267;
4268; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f32:
4269; AVX512:       # %bb.0: # %entry
4270; AVX512-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4271; AVX512-NEXT:    vmovq %rax, %xmm0
4272; AVX512-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4273; AVX512-NEXT:    vmovq %rax, %xmm1
4274; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4275; AVX512-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4276; AVX512-NEXT:    vmovq %rax, %xmm1
4277; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4278; AVX512-NEXT:    retq
4279entry:
4280  %result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32(
4281                                <3 x float><float 42.0, float 43.0,
4282                                            float 44.0>,
4283                                metadata !"fpexcept.strict") #0
4284  ret <3 x i64> %result
4285}
4286
4287define <4 x i64> @constrained_vector_fptosi_v4i64_v4f32() #0 {
4288; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f32:
4289; CHECK:       # %bb.0: # %entry
4290; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4291; CHECK-NEXT:    movq %rax, %xmm1
4292; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4293; CHECK-NEXT:    movq %rax, %xmm0
4294; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4295; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4296; CHECK-NEXT:    movq %rax, %xmm2
4297; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4298; CHECK-NEXT:    movq %rax, %xmm1
4299; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
4300; CHECK-NEXT:    retq
4301;
4302; AVX1-LABEL: constrained_vector_fptosi_v4i64_v4f32:
4303; AVX1:       # %bb.0: # %entry
4304; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4305; AVX1-NEXT:    vmovq %rax, %xmm0
4306; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4307; AVX1-NEXT:    vmovq %rax, %xmm1
4308; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4309; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4310; AVX1-NEXT:    vmovq %rax, %xmm1
4311; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4312; AVX1-NEXT:    vmovq %rax, %xmm2
4313; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
4314; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4315; AVX1-NEXT:    retq
4316;
4317; AVX512F-LABEL: constrained_vector_fptosi_v4i64_v4f32:
4318; AVX512F:       # %bb.0: # %entry
4319; AVX512F-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4320; AVX512F-NEXT:    vmovq %rax, %xmm0
4321; AVX512F-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4322; AVX512F-NEXT:    vmovq %rax, %xmm1
4323; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4324; AVX512F-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4325; AVX512F-NEXT:    vmovq %rax, %xmm1
4326; AVX512F-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4327; AVX512F-NEXT:    vmovq %rax, %xmm2
4328; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
4329; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
4330; AVX512F-NEXT:    retq
4331;
4332; AVX512DQ-LABEL: constrained_vector_fptosi_v4i64_v4f32:
4333; AVX512DQ:       # %bb.0: # %entry
4334; AVX512DQ-NEXT:    vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
4335; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
4336; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
4337; AVX512DQ-NEXT:    retq
4338entry:
4339  %result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(
4340                                <4 x float><float 42.0, float 43.0,
4341                                            float 44.0, float 45.0>,
4342                                metadata !"fpexcept.strict") #0
4343  ret <4 x i64> %result
4344}
4345
4346define <1 x i32> @constrained_vector_fptosi_v1i32_v1f64() #0 {
4347; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f64:
4348; CHECK:       # %bb.0: # %entry
4349; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4350; CHECK-NEXT:    retq
4351;
4352; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f64:
4353; AVX:       # %bb.0: # %entry
4354; AVX-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4355; AVX-NEXT:    retq
4356entry:
4357  %result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(
4358                               <1 x double><double 42.1>,
4359                               metadata !"fpexcept.strict") #0
4360  ret <1 x i32> %result
4361}
4362
4363
4364define <2 x i32> @constrained_vector_fptosi_v2i32_v2f64() #0 {
4365; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f64:
4366; CHECK:       # %bb.0: # %entry
4367; CHECK-NEXT:    cvttpd2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4368; CHECK-NEXT:    retq
4369;
4370; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f64:
4371; AVX:       # %bb.0: # %entry
4372; AVX-NEXT:    vcvttpd2dqx {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4373; AVX-NEXT:    retq
4374entry:
4375  %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(
4376                                <2 x double><double 42.1, double 42.2>,
4377                                metadata !"fpexcept.strict") #0
4378  ret <2 x i32> %result
4379}
4380
4381define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64() #0 {
4382; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f64:
4383; CHECK:       # %bb.0: # %entry
4384; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4385; CHECK-NEXT:    movd %eax, %xmm1
4386; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4387; CHECK-NEXT:    movd %eax, %xmm0
4388; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4389; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4390; CHECK-NEXT:    movd %eax, %xmm1
4391; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4392; CHECK-NEXT:    retq
4393;
4394; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f64:
4395; AVX:       # %bb.0: # %entry
4396; AVX-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4397; AVX-NEXT:    vmovd %eax, %xmm0
4398; AVX-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4399; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
4400; AVX-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4401; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
4402; AVX-NEXT:    retq
4403entry:
4404  %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64(
4405                                <3 x double><double 42.1, double 42.2,
4406                                             double 42.3>,
4407                                metadata !"fpexcept.strict") #0
4408  ret <3 x i32> %result
4409}
4410
4411define <4 x i32> @constrained_vector_fptosi_v4i32_v4f64() #0 {
4412; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f64:
4413; CHECK:       # %bb.0: # %entry
4414; CHECK-NEXT:    cvttpd2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4415; CHECK-NEXT:    cvttpd2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4416; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4417; CHECK-NEXT:    retq
4418;
4419; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f64:
4420; AVX:       # %bb.0: # %entry
4421; AVX-NEXT:    vcvttpd2dqy {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4422; AVX-NEXT:    retq
4423entry:
4424  %result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(
4425                                <4 x double><double 42.1, double 42.2,
4426                                             double 42.3, double 42.4>,
4427                                metadata !"fpexcept.strict") #0
4428  ret <4 x i32> %result
4429}
4430
4431define <1 x i64> @constrained_vector_fptosi_v1i64_v1f64() #0 {
4432; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f64:
4433; CHECK:       # %bb.0: # %entry
4434; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4435; CHECK-NEXT:    retq
4436;
4437; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f64:
4438; AVX:       # %bb.0: # %entry
4439; AVX-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4440; AVX-NEXT:    retq
4441entry:
4442  %result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(
4443                               <1 x double><double 42.1>,
4444                               metadata !"fpexcept.strict") #0
4445  ret <1 x i64> %result
4446}
4447
4448define <2 x i64> @constrained_vector_fptosi_v2i64_v2f64() #0 {
4449; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f64:
4450; CHECK:       # %bb.0: # %entry
4451; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4452; CHECK-NEXT:    movq %rax, %xmm1
4453; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4454; CHECK-NEXT:    movq %rax, %xmm0
4455; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4456; CHECK-NEXT:    retq
4457;
4458; AVX1-LABEL: constrained_vector_fptosi_v2i64_v2f64:
4459; AVX1:       # %bb.0: # %entry
4460; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4461; AVX1-NEXT:    vmovq %rax, %xmm0
4462; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4463; AVX1-NEXT:    vmovq %rax, %xmm1
4464; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4465; AVX1-NEXT:    retq
4466;
4467; AVX512F-LABEL: constrained_vector_fptosi_v2i64_v2f64:
4468; AVX512F:       # %bb.0: # %entry
4469; AVX512F-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4470; AVX512F-NEXT:    vmovq %rax, %xmm0
4471; AVX512F-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4472; AVX512F-NEXT:    vmovq %rax, %xmm1
4473; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4474; AVX512F-NEXT:    retq
4475;
4476; AVX512DQ-LABEL: constrained_vector_fptosi_v2i64_v2f64:
4477; AVX512DQ:       # %bb.0: # %entry
4478; AVX512DQ-NEXT:    vmovaps {{.*#+}} xmm0 = [4.2100000000000001E+1,4.2200000000000003E+1]
4479; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
4480; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
4481; AVX512DQ-NEXT:    vzeroupper
4482; AVX512DQ-NEXT:    retq
4483entry:
4484  %result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(
4485                                <2 x double><double 42.1, double 42.2>,
4486                                metadata !"fpexcept.strict") #0
4487  ret <2 x i64> %result
4488}
4489
4490define <3 x i64> @constrained_vector_fptosi_v3i64_v3f64() #0 {
4491; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f64:
4492; CHECK:       # %bb.0: # %entry
4493; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
4494; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx
4495; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4496; CHECK-NEXT:    retq
4497;
4498; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f64:
4499; AVX1:       # %bb.0: # %entry
4500; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4501; AVX1-NEXT:    vmovq %rax, %xmm0
4502; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4503; AVX1-NEXT:    vmovq %rax, %xmm1
4504; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4505; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4506; AVX1-NEXT:    vmovq %rax, %xmm1
4507; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
4508; AVX1-NEXT:    retq
4509;
4510; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f64:
4511; AVX512:       # %bb.0: # %entry
4512; AVX512-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4513; AVX512-NEXT:    vmovq %rax, %xmm0
4514; AVX512-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4515; AVX512-NEXT:    vmovq %rax, %xmm1
4516; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4517; AVX512-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4518; AVX512-NEXT:    vmovq %rax, %xmm1
4519; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4520; AVX512-NEXT:    retq
4521entry:
4522  %result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64(
4523                                <3 x double><double 42.1, double 42.2,
4524                                             double 42.3>,
4525                                metadata !"fpexcept.strict") #0
4526  ret <3 x i64> %result
4527}
4528
4529define <4 x i64> @constrained_vector_fptosi_v4i64_v4f64() #0 {
4530; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f64:
4531; CHECK:       # %bb.0: # %entry
4532; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4533; CHECK-NEXT:    movq %rax, %xmm1
4534; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4535; CHECK-NEXT:    movq %rax, %xmm0
4536; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4537; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4538; CHECK-NEXT:    movq %rax, %xmm2
4539; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4540; CHECK-NEXT:    movq %rax, %xmm1
4541; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
4542; CHECK-NEXT:    retq
4543;
4544; AVX1-LABEL: constrained_vector_fptosi_v4i64_v4f64:
4545; AVX1:       # %bb.0: # %entry
4546; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4547; AVX1-NEXT:    vmovq %rax, %xmm0
4548; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4549; AVX1-NEXT:    vmovq %rax, %xmm1
4550; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4551; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4552; AVX1-NEXT:    vmovq %rax, %xmm1
4553; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4554; AVX1-NEXT:    vmovq %rax, %xmm2
4555; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
4556; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
4557; AVX1-NEXT:    retq
4558;
4559; AVX512F-LABEL: constrained_vector_fptosi_v4i64_v4f64:
4560; AVX512F:       # %bb.0: # %entry
4561; AVX512F-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4562; AVX512F-NEXT:    vmovq %rax, %xmm0
4563; AVX512F-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4564; AVX512F-NEXT:    vmovq %rax, %xmm1
4565; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4566; AVX512F-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4567; AVX512F-NEXT:    vmovq %rax, %xmm1
4568; AVX512F-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4569; AVX512F-NEXT:    vmovq %rax, %xmm2
4570; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
4571; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
4572; AVX512F-NEXT:    retq
4573;
4574; AVX512DQ-LABEL: constrained_vector_fptosi_v4i64_v4f64:
4575; AVX512DQ:       # %bb.0: # %entry
4576; AVX512DQ-NEXT:    vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
4577; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
4578; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
4579; AVX512DQ-NEXT:    retq
4580entry:
4581  %result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(
4582                                <4 x double><double 42.1, double 42.2,
4583                                             double 42.3, double 42.4>,
4584                                metadata !"fpexcept.strict") #0
4585  ret <4 x i64> %result
4586}
4587
4588define <1 x i32> @constrained_vector_fptoui_v1i32_v1f32() #0 {
4589; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f32:
4590; CHECK:       # %bb.0: # %entry
4591; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4592; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
4593; CHECK-NEXT:    retq
4594;
4595; AVX1-LABEL: constrained_vector_fptoui_v1i32_v1f32:
4596; AVX1:       # %bb.0: # %entry
4597; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4598; AVX1-NEXT:    # kill: def $eax killed $eax killed $rax
4599; AVX1-NEXT:    retq
4600;
4601; AVX512-LABEL: constrained_vector_fptoui_v1i32_v1f32:
4602; AVX512:       # %bb.0: # %entry
4603; AVX512-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4604; AVX512-NEXT:    retq
4605entry:
4606  %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32(
4607                               <1 x float><float 42.0>,
4608                               metadata !"fpexcept.strict") #0
4609  ret <1 x i32> %result
4610}
4611
4612define <2 x i32> @constrained_vector_fptoui_v2i32_v2f32() #0 {
4613; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f32:
4614; CHECK:       # %bb.0: # %entry
4615; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4616; CHECK-NEXT:    movd %eax, %xmm1
4617; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4618; CHECK-NEXT:    movd %eax, %xmm0
4619; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4620; CHECK-NEXT:    retq
4621;
4622; AVX1-LABEL: constrained_vector_fptoui_v2i32_v2f32:
4623; AVX1:       # %bb.0: # %entry
4624; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4625; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
4626; AVX1-NEXT:    vmovd %ecx, %xmm0
4627; AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
4628; AVX1-NEXT:    retq
4629;
4630; AVX512-LABEL: constrained_vector_fptoui_v2i32_v2f32:
4631; AVX512:       # %bb.0: # %entry
4632; AVX512-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,4.3E+1,0.0E+0,0.0E+0]
4633; AVX512-NEXT:    vcvttps2udq %zmm0, %zmm0
4634; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
4635; AVX512-NEXT:    vzeroupper
4636; AVX512-NEXT:    retq
4637entry:
4638  %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(
4639                                <2 x float><float 42.0, float 43.0>,
4640                                metadata !"fpexcept.strict") #0
4641  ret <2 x i32> %result
4642}
4643
4644define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() #0 {
4645; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f32:
4646; CHECK:       # %bb.0: # %entry
4647; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4648; CHECK-NEXT:    movd %eax, %xmm1
4649; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4650; CHECK-NEXT:    movd %eax, %xmm0
4651; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4652; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4653; CHECK-NEXT:    movd %eax, %xmm1
4654; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4655; CHECK-NEXT:    retq
4656;
4657; AVX1-LABEL: constrained_vector_fptoui_v3i32_v3f32:
4658; AVX1:       # %bb.0: # %entry
4659; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4660; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
4661; AVX1-NEXT:    vmovd %ecx, %xmm0
4662; AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
4663; AVX1-NEXT:    vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4664; AVX1-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
4665; AVX1-NEXT:    retq
4666;
4667; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f32:
4668; AVX512:       # %bb.0: # %entry
4669; AVX512-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4670; AVX512-NEXT:    vmovd %eax, %xmm0
4671; AVX512-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4672; AVX512-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
4673; AVX512-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
4674; AVX512-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
4675; AVX512-NEXT:    retq
4676entry:
4677  %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32(
4678                                <3 x float><float 42.0, float 43.0,
4679                                            float 44.0>,
4680                                metadata !"fpexcept.strict") #0
4681  ret <3 x i32> %result
4682}
4683
4684define <4 x i32> @constrained_vector_fptoui_v4i32_v4f32() #0 {
4685; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f32:
4686; CHECK:       # %bb.0: # %entry
4687; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
4688; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
4689; CHECK-NEXT:    movaps %xmm1, %xmm2
4690; CHECK-NEXT:    cmpltps %xmm0, %xmm2
4691; CHECK-NEXT:    movaps %xmm2, %xmm3
4692; CHECK-NEXT:    andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
4693; CHECK-NEXT:    andnps %xmm0, %xmm2
4694; CHECK-NEXT:    subps %xmm2, %xmm1
4695; CHECK-NEXT:    cvttps2dq %xmm1, %xmm0
4696; CHECK-NEXT:    xorps %xmm3, %xmm0
4697; CHECK-NEXT:    retq
4698;
4699; AVX1-LABEL: constrained_vector_fptoui_v4i32_v4f32:
4700; AVX1:       # %bb.0: # %entry
4701; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm0 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
4702; AVX1-NEXT:    vmovaps {{.*#+}} xmm1 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
4703; AVX1-NEXT:    vcmpltps %xmm0, %xmm1, %xmm2
4704; AVX1-NEXT:    vxorps %xmm3, %xmm3, %xmm3
4705; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
4706; AVX1-NEXT:    vblendvps %xmm2, %xmm3, %xmm4, %xmm4
4707; AVX1-NEXT:    vblendvps %xmm2, %xmm3, %xmm0, %xmm0
4708; AVX1-NEXT:    vsubps %xmm0, %xmm1, %xmm0
4709; AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
4710; AVX1-NEXT:    vxorps %xmm4, %xmm0, %xmm0
4711; AVX1-NEXT:    retq
4712;
4713; AVX512-LABEL: constrained_vector_fptoui_v4i32_v4f32:
4714; AVX512:       # %bb.0: # %entry
4715; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
4716; AVX512-NEXT:    vcvttps2udq %zmm0, %zmm0
4717; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
4718; AVX512-NEXT:    vzeroupper
4719; AVX512-NEXT:    retq
4720entry:
4721  %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(
4722                                <4 x float><float 42.0, float 43.0,
4723                                            float 44.0, float 45.0>,
4724                                metadata !"fpexcept.strict") #0
4725  ret <4 x i32> %result
4726}
4727
4728define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 {
4729; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f32:
4730; CHECK:       # %bb.0: # %entry
4731; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
4732; CHECK-NEXT:    movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
4733; CHECK-NEXT:    comiss %xmm0, %xmm2
4734; CHECK-NEXT:    xorps %xmm1, %xmm1
4735; CHECK-NEXT:    ja .LBB121_2
4736; CHECK-NEXT:  # %bb.1: # %entry
4737; CHECK-NEXT:    movaps %xmm2, %xmm1
4738; CHECK-NEXT:  .LBB121_2: # %entry
4739; CHECK-NEXT:    subss %xmm1, %xmm0
4740; CHECK-NEXT:    cvttss2si %xmm0, %rcx
4741; CHECK-NEXT:    setbe %al
4742; CHECK-NEXT:    movzbl %al, %eax
4743; CHECK-NEXT:    shlq $63, %rax
4744; CHECK-NEXT:    xorq %rcx, %rax
4745; CHECK-NEXT:    retq
4746;
4747; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f32:
4748; AVX1:       # %bb.0: # %entry
4749; AVX1-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
4750; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
4751; AVX1-NEXT:    vcomiss %xmm0, %xmm1
4752; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
4753; AVX1-NEXT:    ja .LBB121_2
4754; AVX1-NEXT:  # %bb.1: # %entry
4755; AVX1-NEXT:    vmovaps %xmm1, %xmm2
4756; AVX1-NEXT:  .LBB121_2: # %entry
4757; AVX1-NEXT:    vsubss %xmm2, %xmm0, %xmm0
4758; AVX1-NEXT:    vcvttss2si %xmm0, %rcx
4759; AVX1-NEXT:    setbe %al
4760; AVX1-NEXT:    movzbl %al, %eax
4761; AVX1-NEXT:    shlq $63, %rax
4762; AVX1-NEXT:    xorq %rcx, %rax
4763; AVX1-NEXT:    retq
4764;
4765; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f32:
4766; AVX512:       # %bb.0: # %entry
4767; AVX512-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4768; AVX512-NEXT:    retq
4769entry:
4770  %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32(
4771                               <1 x float><float 42.0>,
4772                               metadata !"fpexcept.strict") #0
4773  ret <1 x i64> %result
4774}
4775
4776define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
4777; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f32:
4778; CHECK:       # %bb.0: # %entry
4779; CHECK-NEXT:    movss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
4780; CHECK-NEXT:    movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
4781; CHECK-NEXT:    comiss %xmm2, %xmm1
4782; CHECK-NEXT:    xorps %xmm0, %xmm0
4783; CHECK-NEXT:    xorps %xmm3, %xmm3
4784; CHECK-NEXT:    ja .LBB122_2
4785; CHECK-NEXT:  # %bb.1: # %entry
4786; CHECK-NEXT:    movaps %xmm1, %xmm3
4787; CHECK-NEXT:  .LBB122_2: # %entry
4788; CHECK-NEXT:    subss %xmm3, %xmm2
4789; CHECK-NEXT:    cvttss2si %xmm2, %rax
4790; CHECK-NEXT:    setbe %cl
4791; CHECK-NEXT:    movzbl %cl, %ecx
4792; CHECK-NEXT:    shlq $63, %rcx
4793; CHECK-NEXT:    xorq %rax, %rcx
4794; CHECK-NEXT:    movq %rcx, %xmm2
4795; CHECK-NEXT:    movss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
4796; CHECK-NEXT:    comiss %xmm3, %xmm1
4797; CHECK-NEXT:    ja .LBB122_4
4798; CHECK-NEXT:  # %bb.3: # %entry
4799; CHECK-NEXT:    movaps %xmm1, %xmm0
4800; CHECK-NEXT:  .LBB122_4: # %entry
4801; CHECK-NEXT:    subss %xmm0, %xmm3
4802; CHECK-NEXT:    cvttss2si %xmm3, %rax
4803; CHECK-NEXT:    setbe %cl
4804; CHECK-NEXT:    movzbl %cl, %ecx
4805; CHECK-NEXT:    shlq $63, %rcx
4806; CHECK-NEXT:    xorq %rax, %rcx
4807; CHECK-NEXT:    movq %rcx, %xmm0
4808; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
4809; CHECK-NEXT:    retq
4810;
4811; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f32:
4812; AVX1:       # %bb.0: # %entry
4813; AVX1-NEXT:    vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
4814; AVX1-NEXT:    vmovss {{.*#+}} xmm0 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
4815; AVX1-NEXT:    vcomiss %xmm2, %xmm0
4816; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
4817; AVX1-NEXT:    vxorps %xmm3, %xmm3, %xmm3
4818; AVX1-NEXT:    ja .LBB122_2
4819; AVX1-NEXT:  # %bb.1: # %entry
4820; AVX1-NEXT:    vmovaps %xmm0, %xmm3
4821; AVX1-NEXT:  .LBB122_2: # %entry
4822; AVX1-NEXT:    vsubss %xmm3, %xmm2, %xmm2
4823; AVX1-NEXT:    vcvttss2si %xmm2, %rax
4824; AVX1-NEXT:    setbe %cl
4825; AVX1-NEXT:    movzbl %cl, %ecx
4826; AVX1-NEXT:    shlq $63, %rcx
4827; AVX1-NEXT:    xorq %rax, %rcx
4828; AVX1-NEXT:    vmovq %rcx, %xmm2
4829; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
4830; AVX1-NEXT:    vcomiss %xmm3, %xmm0
4831; AVX1-NEXT:    ja .LBB122_4
4832; AVX1-NEXT:  # %bb.3: # %entry
4833; AVX1-NEXT:    vmovaps %xmm0, %xmm1
4834; AVX1-NEXT:  .LBB122_4: # %entry
4835; AVX1-NEXT:    vsubss %xmm1, %xmm3, %xmm0
4836; AVX1-NEXT:    vcvttss2si %xmm0, %rax
4837; AVX1-NEXT:    setbe %cl
4838; AVX1-NEXT:    movzbl %cl, %ecx
4839; AVX1-NEXT:    shlq $63, %rcx
4840; AVX1-NEXT:    xorq %rax, %rcx
4841; AVX1-NEXT:    vmovq %rcx, %xmm0
4842; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
4843; AVX1-NEXT:    retq
4844;
4845; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f32:
4846; AVX512F:       # %bb.0: # %entry
4847; AVX512F-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4848; AVX512F-NEXT:    vmovq %rax, %xmm0
4849; AVX512F-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4850; AVX512F-NEXT:    vmovq %rax, %xmm1
4851; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4852; AVX512F-NEXT:    retq
4853;
4854; AVX512DQ-LABEL: constrained_vector_fptoui_v2i64_v2f32:
4855; AVX512DQ:       # %bb.0: # %entry
4856; AVX512DQ-NEXT:    vcvttps2uqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0
4857; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
4858; AVX512DQ-NEXT:    vzeroupper
4859; AVX512DQ-NEXT:    retq
4860entry:
4861  %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(
4862                                <2 x float><float 42.0, float 43.0>,
4863                                metadata !"fpexcept.strict") #0
4864  ret <2 x i64> %result
4865}
4866
4867define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
4868; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f32:
4869; CHECK:       # %bb.0: # %entry
4870; CHECK-NEXT:    movss {{.*#+}} xmm2 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
4871; CHECK-NEXT:    movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
4872; CHECK-NEXT:    comiss %xmm2, %xmm1
4873; CHECK-NEXT:    xorps %xmm0, %xmm0
4874; CHECK-NEXT:    xorps %xmm3, %xmm3
4875; CHECK-NEXT:    ja .LBB123_2
4876; CHECK-NEXT:  # %bb.1: # %entry
4877; CHECK-NEXT:    movaps %xmm1, %xmm3
4878; CHECK-NEXT:  .LBB123_2: # %entry
4879; CHECK-NEXT:    subss %xmm3, %xmm2
4880; CHECK-NEXT:    cvttss2si %xmm2, %rcx
4881; CHECK-NEXT:    setbe %al
4882; CHECK-NEXT:    movzbl %al, %eax
4883; CHECK-NEXT:    shlq $63, %rax
4884; CHECK-NEXT:    xorq %rcx, %rax
4885; CHECK-NEXT:    movss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
4886; CHECK-NEXT:    comiss %xmm2, %xmm1
4887; CHECK-NEXT:    xorps %xmm3, %xmm3
4888; CHECK-NEXT:    ja .LBB123_4
4889; CHECK-NEXT:  # %bb.3: # %entry
4890; CHECK-NEXT:    movaps %xmm1, %xmm3
4891; CHECK-NEXT:  .LBB123_4: # %entry
4892; CHECK-NEXT:    subss %xmm3, %xmm2
4893; CHECK-NEXT:    cvttss2si %xmm2, %rcx
4894; CHECK-NEXT:    setbe %dl
4895; CHECK-NEXT:    movzbl %dl, %edx
4896; CHECK-NEXT:    shlq $63, %rdx
4897; CHECK-NEXT:    xorq %rcx, %rdx
4898; CHECK-NEXT:    movss {{.*#+}} xmm2 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
4899; CHECK-NEXT:    comiss %xmm2, %xmm1
4900; CHECK-NEXT:    ja .LBB123_6
4901; CHECK-NEXT:  # %bb.5: # %entry
4902; CHECK-NEXT:    movaps %xmm1, %xmm0
4903; CHECK-NEXT:  .LBB123_6: # %entry
4904; CHECK-NEXT:    subss %xmm0, %xmm2
4905; CHECK-NEXT:    cvttss2si %xmm2, %rsi
4906; CHECK-NEXT:    setbe %cl
4907; CHECK-NEXT:    movzbl %cl, %ecx
4908; CHECK-NEXT:    shlq $63, %rcx
4909; CHECK-NEXT:    xorq %rsi, %rcx
4910; CHECK-NEXT:    retq
4911;
4912; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f32:
4913; AVX1:       # %bb.0: # %entry
4914; AVX1-NEXT:    vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
4915; AVX1-NEXT:    vmovss {{.*#+}} xmm0 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
4916; AVX1-NEXT:    vcomiss %xmm2, %xmm0
4917; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
4918; AVX1-NEXT:    vxorps %xmm3, %xmm3, %xmm3
4919; AVX1-NEXT:    ja .LBB123_2
4920; AVX1-NEXT:  # %bb.1: # %entry
4921; AVX1-NEXT:    vmovaps %xmm0, %xmm3
4922; AVX1-NEXT:  .LBB123_2: # %entry
4923; AVX1-NEXT:    vsubss %xmm3, %xmm2, %xmm2
4924; AVX1-NEXT:    vcvttss2si %xmm2, %rax
4925; AVX1-NEXT:    setbe %cl
4926; AVX1-NEXT:    movzbl %cl, %ecx
4927; AVX1-NEXT:    shlq $63, %rcx
4928; AVX1-NEXT:    xorq %rax, %rcx
4929; AVX1-NEXT:    vmovq %rcx, %xmm2
4930; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
4931; AVX1-NEXT:    vcomiss %xmm3, %xmm0
4932; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
4933; AVX1-NEXT:    ja .LBB123_4
4934; AVX1-NEXT:  # %bb.3: # %entry
4935; AVX1-NEXT:    vmovaps %xmm0, %xmm4
4936; AVX1-NEXT:  .LBB123_4: # %entry
4937; AVX1-NEXT:    vsubss %xmm4, %xmm3, %xmm3
4938; AVX1-NEXT:    vcvttss2si %xmm3, %rax
4939; AVX1-NEXT:    setbe %cl
4940; AVX1-NEXT:    movzbl %cl, %ecx
4941; AVX1-NEXT:    shlq $63, %rcx
4942; AVX1-NEXT:    xorq %rax, %rcx
4943; AVX1-NEXT:    vmovq %rcx, %xmm3
4944; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
4945; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
4946; AVX1-NEXT:    vcomiss %xmm3, %xmm0
4947; AVX1-NEXT:    ja .LBB123_6
4948; AVX1-NEXT:  # %bb.5: # %entry
4949; AVX1-NEXT:    vmovaps %xmm0, %xmm1
4950; AVX1-NEXT:  .LBB123_6: # %entry
4951; AVX1-NEXT:    vsubss %xmm1, %xmm3, %xmm0
4952; AVX1-NEXT:    vcvttss2si %xmm0, %rax
4953; AVX1-NEXT:    setbe %cl
4954; AVX1-NEXT:    movzbl %cl, %ecx
4955; AVX1-NEXT:    shlq $63, %rcx
4956; AVX1-NEXT:    xorq %rax, %rcx
4957; AVX1-NEXT:    vmovq %rcx, %xmm0
4958; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
4959; AVX1-NEXT:    retq
4960;
4961; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f32:
4962; AVX512:       # %bb.0: # %entry
4963; AVX512-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4964; AVX512-NEXT:    vmovq %rax, %xmm0
4965; AVX512-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4966; AVX512-NEXT:    vmovq %rax, %xmm1
4967; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
4968; AVX512-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
4969; AVX512-NEXT:    vmovq %rax, %xmm1
4970; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4971; AVX512-NEXT:    retq
4972entry:
4973  %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32(
4974                                <3 x float><float 42.0, float 43.0,
4975                                            float 44.0>,
4976                                metadata !"fpexcept.strict") #0
4977  ret <3 x i64> %result
4978}
4979
4980define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
4981; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f32:
4982; CHECK:       # %bb.0: # %entry
4983; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
4984; CHECK-NEXT:    movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
4985; CHECK-NEXT:    comiss %xmm0, %xmm2
4986; CHECK-NEXT:    xorps %xmm1, %xmm1
4987; CHECK-NEXT:    xorps %xmm3, %xmm3
4988; CHECK-NEXT:    ja .LBB124_2
4989; CHECK-NEXT:  # %bb.1: # %entry
4990; CHECK-NEXT:    movaps %xmm2, %xmm3
4991; CHECK-NEXT:  .LBB124_2: # %entry
4992; CHECK-NEXT:    subss %xmm3, %xmm0
4993; CHECK-NEXT:    cvttss2si %xmm0, %rcx
4994; CHECK-NEXT:    setbe %al
4995; CHECK-NEXT:    movzbl %al, %eax
4996; CHECK-NEXT:    shlq $63, %rax
4997; CHECK-NEXT:    xorq %rcx, %rax
4998; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
4999; CHECK-NEXT:    comiss %xmm0, %xmm2
5000; CHECK-NEXT:    xorps %xmm4, %xmm4
5001; CHECK-NEXT:    ja .LBB124_4
5002; CHECK-NEXT:  # %bb.3: # %entry
5003; CHECK-NEXT:    movaps %xmm2, %xmm4
5004; CHECK-NEXT:  .LBB124_4: # %entry
5005; CHECK-NEXT:    movq %rax, %xmm3
5006; CHECK-NEXT:    subss %xmm4, %xmm0
5007; CHECK-NEXT:    cvttss2si %xmm0, %rax
5008; CHECK-NEXT:    setbe %cl
5009; CHECK-NEXT:    movzbl %cl, %ecx
5010; CHECK-NEXT:    shlq $63, %rcx
5011; CHECK-NEXT:    xorq %rax, %rcx
5012; CHECK-NEXT:    movq %rcx, %xmm0
5013; CHECK-NEXT:    movss {{.*#+}} xmm4 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0]
5014; CHECK-NEXT:    comiss %xmm4, %xmm2
5015; CHECK-NEXT:    xorps %xmm5, %xmm5
5016; CHECK-NEXT:    ja .LBB124_6
5017; CHECK-NEXT:  # %bb.5: # %entry
5018; CHECK-NEXT:    movaps %xmm2, %xmm5
5019; CHECK-NEXT:  .LBB124_6: # %entry
5020; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
5021; CHECK-NEXT:    subss %xmm5, %xmm4
5022; CHECK-NEXT:    cvttss2si %xmm4, %rax
5023; CHECK-NEXT:    setbe %cl
5024; CHECK-NEXT:    movzbl %cl, %ecx
5025; CHECK-NEXT:    shlq $63, %rcx
5026; CHECK-NEXT:    xorq %rax, %rcx
5027; CHECK-NEXT:    movq %rcx, %xmm3
5028; CHECK-NEXT:    movss {{.*#+}} xmm4 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
5029; CHECK-NEXT:    comiss %xmm4, %xmm2
5030; CHECK-NEXT:    ja .LBB124_8
5031; CHECK-NEXT:  # %bb.7: # %entry
5032; CHECK-NEXT:    movaps %xmm2, %xmm1
5033; CHECK-NEXT:  .LBB124_8: # %entry
5034; CHECK-NEXT:    subss %xmm1, %xmm4
5035; CHECK-NEXT:    cvttss2si %xmm4, %rax
5036; CHECK-NEXT:    setbe %cl
5037; CHECK-NEXT:    movzbl %cl, %ecx
5038; CHECK-NEXT:    shlq $63, %rcx
5039; CHECK-NEXT:    xorq %rax, %rcx
5040; CHECK-NEXT:    movq %rcx, %xmm1
5041; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
5042; CHECK-NEXT:    retq
5043;
5044; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f32:
5045; AVX1:       # %bb.0: # %entry
5046; AVX1-NEXT:    vmovss {{.*#+}} xmm2 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0]
5047; AVX1-NEXT:    vmovss {{.*#+}} xmm0 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
5048; AVX1-NEXT:    vcomiss %xmm2, %xmm0
5049; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
5050; AVX1-NEXT:    vxorps %xmm3, %xmm3, %xmm3
5051; AVX1-NEXT:    ja .LBB124_2
5052; AVX1-NEXT:  # %bb.1: # %entry
5053; AVX1-NEXT:    vmovaps %xmm0, %xmm3
5054; AVX1-NEXT:  .LBB124_2: # %entry
5055; AVX1-NEXT:    vsubss %xmm3, %xmm2, %xmm2
5056; AVX1-NEXT:    vcvttss2si %xmm2, %rcx
5057; AVX1-NEXT:    setbe %al
5058; AVX1-NEXT:    movzbl %al, %eax
5059; AVX1-NEXT:    shlq $63, %rax
5060; AVX1-NEXT:    xorq %rcx, %rax
5061; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
5062; AVX1-NEXT:    vcomiss %xmm3, %xmm0
5063; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
5064; AVX1-NEXT:    ja .LBB124_4
5065; AVX1-NEXT:  # %bb.3: # %entry
5066; AVX1-NEXT:    vmovaps %xmm0, %xmm4
5067; AVX1-NEXT:  .LBB124_4: # %entry
5068; AVX1-NEXT:    vmovq %rax, %xmm2
5069; AVX1-NEXT:    vsubss %xmm4, %xmm3, %xmm3
5070; AVX1-NEXT:    vcvttss2si %xmm3, %rax
5071; AVX1-NEXT:    setbe %cl
5072; AVX1-NEXT:    movzbl %cl, %ecx
5073; AVX1-NEXT:    shlq $63, %rcx
5074; AVX1-NEXT:    xorq %rax, %rcx
5075; AVX1-NEXT:    vmovq %rcx, %xmm3
5076; AVX1-NEXT:    vmovss {{.*#+}} xmm4 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
5077; AVX1-NEXT:    vcomiss %xmm4, %xmm0
5078; AVX1-NEXT:    vxorps %xmm5, %xmm5, %xmm5
5079; AVX1-NEXT:    ja .LBB124_6
5080; AVX1-NEXT:  # %bb.5: # %entry
5081; AVX1-NEXT:    vmovaps %xmm0, %xmm5
5082; AVX1-NEXT:  .LBB124_6: # %entry
5083; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
5084; AVX1-NEXT:    vsubss %xmm5, %xmm4, %xmm3
5085; AVX1-NEXT:    vcvttss2si %xmm3, %rax
5086; AVX1-NEXT:    setbe %cl
5087; AVX1-NEXT:    movzbl %cl, %ecx
5088; AVX1-NEXT:    shlq $63, %rcx
5089; AVX1-NEXT:    xorq %rax, %rcx
5090; AVX1-NEXT:    vmovq %rcx, %xmm3
5091; AVX1-NEXT:    vmovss {{.*#+}} xmm4 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
5092; AVX1-NEXT:    vcomiss %xmm4, %xmm0
5093; AVX1-NEXT:    ja .LBB124_8
5094; AVX1-NEXT:  # %bb.7: # %entry
5095; AVX1-NEXT:    vmovaps %xmm0, %xmm1
5096; AVX1-NEXT:  .LBB124_8: # %entry
5097; AVX1-NEXT:    vsubss %xmm1, %xmm4, %xmm0
5098; AVX1-NEXT:    vcvttss2si %xmm0, %rax
5099; AVX1-NEXT:    setbe %cl
5100; AVX1-NEXT:    movzbl %cl, %ecx
5101; AVX1-NEXT:    shlq $63, %rcx
5102; AVX1-NEXT:    xorq %rax, %rcx
5103; AVX1-NEXT:    vmovq %rcx, %xmm0
5104; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
5105; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
5106; AVX1-NEXT:    retq
5107;
5108; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f32:
5109; AVX512F:       # %bb.0: # %entry
5110; AVX512F-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5111; AVX512F-NEXT:    vmovq %rax, %xmm0
5112; AVX512F-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5113; AVX512F-NEXT:    vmovq %rax, %xmm1
5114; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
5115; AVX512F-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5116; AVX512F-NEXT:    vmovq %rax, %xmm1
5117; AVX512F-NEXT:    vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5118; AVX512F-NEXT:    vmovq %rax, %xmm2
5119; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
5120; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
5121; AVX512F-NEXT:    retq
5122;
5123; AVX512DQ-LABEL: constrained_vector_fptoui_v4i64_v4f32:
5124; AVX512DQ:       # %bb.0: # %entry
5125; AVX512DQ-NEXT:    vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
5126; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
5127; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
5128; AVX512DQ-NEXT:    retq
5129entry:
5130  %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(
5131                                <4 x float><float 42.0, float 43.0,
5132                                            float 44.0, float 45.0>,
5133                                metadata !"fpexcept.strict") #0
5134  ret <4 x i64> %result
5135}
5136
5137define <1 x i32> @constrained_vector_fptoui_v1i32_v1f64() #0 {
5138; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f64:
5139; CHECK:       # %bb.0: # %entry
5140; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5141; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
5142; CHECK-NEXT:    retq
5143;
5144; AVX1-LABEL: constrained_vector_fptoui_v1i32_v1f64:
5145; AVX1:       # %bb.0: # %entry
5146; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5147; AVX1-NEXT:    # kill: def $eax killed $eax killed $rax
5148; AVX1-NEXT:    retq
5149;
5150; AVX512-LABEL: constrained_vector_fptoui_v1i32_v1f64:
5151; AVX512:       # %bb.0: # %entry
5152; AVX512-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
5153; AVX512-NEXT:    retq
5154entry:
5155  %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(
5156                               <1 x double><double 42.1>,
5157                               metadata !"fpexcept.strict") #0
5158  ret <1 x i32> %result
5159}
5160
5161define <2 x i32> @constrained_vector_fptoui_v2i32_v2f64() #0 {
5162; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f64:
5163; CHECK:       # %bb.0: # %entry
5164; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5165; CHECK-NEXT:    movd %eax, %xmm1
5166; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5167; CHECK-NEXT:    movd %eax, %xmm0
5168; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5169; CHECK-NEXT:    retq
5170;
5171; AVX1-LABEL: constrained_vector_fptoui_v2i32_v2f64:
5172; AVX1:       # %bb.0: # %entry
5173; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5174; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
5175; AVX1-NEXT:    vmovd %ecx, %xmm0
5176; AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
5177; AVX1-NEXT:    retq
5178;
5179; AVX512-LABEL: constrained_vector_fptoui_v2i32_v2f64:
5180; AVX512:       # %bb.0: # %entry
5181; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,0.0E+0,0.0E+0]
5182; AVX512-NEXT:    vcvttpd2udq %zmm0, %ymm0
5183; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
5184; AVX512-NEXT:    vzeroupper
5185; AVX512-NEXT:    retq
5186entry:
5187  %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(
5188                                <2 x double><double 42.1, double 42.2>,
5189                                metadata !"fpexcept.strict") #0
5190  ret <2 x i32> %result
5191}
5192
5193define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() #0 {
5194; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f64:
5195; CHECK:       # %bb.0: # %entry
5196; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5197; CHECK-NEXT:    movd %eax, %xmm1
5198; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5199; CHECK-NEXT:    movd %eax, %xmm0
5200; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5201; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5202; CHECK-NEXT:    movd %eax, %xmm1
5203; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5204; CHECK-NEXT:    retq
5205;
5206; AVX1-LABEL: constrained_vector_fptoui_v3i32_v3f64:
5207; AVX1:       # %bb.0: # %entry
5208; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5209; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx
5210; AVX1-NEXT:    vmovd %ecx, %xmm0
5211; AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
5212; AVX1-NEXT:    vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5213; AVX1-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
5214; AVX1-NEXT:    retq
5215;
5216; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f64:
5217; AVX512:       # %bb.0: # %entry
5218; AVX512-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
5219; AVX512-NEXT:    vmovd %eax, %xmm0
5220; AVX512-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
5221; AVX512-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
5222; AVX512-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
5223; AVX512-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
5224; AVX512-NEXT:    retq
5225entry:
5226  %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64(
5227                                <3 x double><double 42.1, double 42.2,
5228                                             double 42.3>,
5229                                metadata !"fpexcept.strict") #0
5230  ret <3 x i32> %result
5231}
5232
5233define <4 x i32> @constrained_vector_fptoui_v4i32_v4f64() #0 {
5234; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f64:
5235; CHECK:       # %bb.0: # %entry
5236; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5237; CHECK-NEXT:    movd %eax, %xmm0
5238; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5239; CHECK-NEXT:    movd %eax, %xmm1
5240; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
5241; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5242; CHECK-NEXT:    movd %eax, %xmm2
5243; CHECK-NEXT:    cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5244; CHECK-NEXT:    movd %eax, %xmm0
5245; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
5246; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5247; CHECK-NEXT:    retq
5248;
5249; AVX1-LABEL: constrained_vector_fptoui_v4i32_v4f64:
5250; AVX1:       # %bb.0: # %entry
5251; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
5252; AVX1-NEXT:    vmovapd {{.*#+}} ymm1 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
5253; AVX1-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm2
5254; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
5255; AVX1-NEXT:    vshufps {{.*#+}} xmm3 = xmm2[0,2],xmm3[0,2]
5256; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
5257; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
5258; AVX1-NEXT:    vblendvps %xmm3, %xmm4, %xmm5, %xmm3
5259; AVX1-NEXT:    vxorps %xmm4, %xmm4, %xmm4
5260; AVX1-NEXT:    vblendvpd %ymm2, %ymm4, %ymm0, %ymm0
5261; AVX1-NEXT:    vsubpd %ymm0, %ymm1, %ymm0
5262; AVX1-NEXT:    vcvttpd2dq %ymm0, %xmm0
5263; AVX1-NEXT:    vxorpd %xmm3, %xmm0, %xmm0
5264; AVX1-NEXT:    vzeroupper
5265; AVX1-NEXT:    retq
5266;
5267; AVX512-LABEL: constrained_vector_fptoui_v4i32_v4f64:
5268; AVX512:       # %bb.0: # %entry
5269; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
5270; AVX512-NEXT:    vcvttpd2udq %zmm0, %ymm0
5271; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
5272; AVX512-NEXT:    vzeroupper
5273; AVX512-NEXT:    retq
5274entry:
5275  %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(
5276                                <4 x double><double 42.1, double 42.2,
5277                                             double 42.3, double 42.4>,
5278                                metadata !"fpexcept.strict") #0
5279  ret <4 x i32> %result
5280}
5281
5282define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 {
5283; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f64:
5284; CHECK:       # %bb.0: # %entry
5285; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
5286; CHECK-NEXT:    movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0]
5287; CHECK-NEXT:    comisd %xmm0, %xmm2
5288; CHECK-NEXT:    xorpd %xmm1, %xmm1
5289; CHECK-NEXT:    ja .LBB129_2
5290; CHECK-NEXT:  # %bb.1: # %entry
5291; CHECK-NEXT:    movapd %xmm2, %xmm1
5292; CHECK-NEXT:  .LBB129_2: # %entry
5293; CHECK-NEXT:    subsd %xmm1, %xmm0
5294; CHECK-NEXT:    cvttsd2si %xmm0, %rcx
5295; CHECK-NEXT:    setbe %al
5296; CHECK-NEXT:    movzbl %al, %eax
5297; CHECK-NEXT:    shlq $63, %rax
5298; CHECK-NEXT:    xorq %rcx, %rax
5299; CHECK-NEXT:    retq
5300;
5301; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f64:
5302; AVX1:       # %bb.0: # %entry
5303; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
5304; AVX1-NEXT:    vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
5305; AVX1-NEXT:    vcomisd %xmm0, %xmm1
5306; AVX1-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
5307; AVX1-NEXT:    ja .LBB129_2
5308; AVX1-NEXT:  # %bb.1: # %entry
5309; AVX1-NEXT:    vmovapd %xmm1, %xmm2
5310; AVX1-NEXT:  .LBB129_2: # %entry
5311; AVX1-NEXT:    vsubsd %xmm2, %xmm0, %xmm0
5312; AVX1-NEXT:    vcvttsd2si %xmm0, %rcx
5313; AVX1-NEXT:    setbe %al
5314; AVX1-NEXT:    movzbl %al, %eax
5315; AVX1-NEXT:    shlq $63, %rax
5316; AVX1-NEXT:    xorq %rcx, %rax
5317; AVX1-NEXT:    retq
5318;
5319; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f64:
5320; AVX512:       # %bb.0: # %entry
5321; AVX512-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5322; AVX512-NEXT:    retq
5323entry:
5324  %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(
5325                               <1 x double><double 42.1>,
5326                               metadata !"fpexcept.strict") #0
5327  ret <1 x i64> %result
5328}
5329
5330define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
5331; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f64:
5332; CHECK:       # %bb.0: # %entry
5333; CHECK-NEXT:    movsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0]
5334; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
5335; CHECK-NEXT:    comisd %xmm2, %xmm1
5336; CHECK-NEXT:    xorpd %xmm0, %xmm0
5337; CHECK-NEXT:    xorpd %xmm3, %xmm3
5338; CHECK-NEXT:    ja .LBB130_2
5339; CHECK-NEXT:  # %bb.1: # %entry
5340; CHECK-NEXT:    movapd %xmm1, %xmm3
5341; CHECK-NEXT:  .LBB130_2: # %entry
5342; CHECK-NEXT:    subsd %xmm3, %xmm2
5343; CHECK-NEXT:    cvttsd2si %xmm2, %rax
5344; CHECK-NEXT:    setbe %cl
5345; CHECK-NEXT:    movzbl %cl, %ecx
5346; CHECK-NEXT:    shlq $63, %rcx
5347; CHECK-NEXT:    xorq %rax, %rcx
5348; CHECK-NEXT:    movq %rcx, %xmm2
5349; CHECK-NEXT:    movsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0]
5350; CHECK-NEXT:    comisd %xmm3, %xmm1
5351; CHECK-NEXT:    ja .LBB130_4
5352; CHECK-NEXT:  # %bb.3: # %entry
5353; CHECK-NEXT:    movapd %xmm1, %xmm0
5354; CHECK-NEXT:  .LBB130_4: # %entry
5355; CHECK-NEXT:    subsd %xmm0, %xmm3
5356; CHECK-NEXT:    cvttsd2si %xmm3, %rax
5357; CHECK-NEXT:    setbe %cl
5358; CHECK-NEXT:    movzbl %cl, %ecx
5359; CHECK-NEXT:    shlq $63, %rcx
5360; CHECK-NEXT:    xorq %rax, %rcx
5361; CHECK-NEXT:    movq %rcx, %xmm0
5362; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
5363; CHECK-NEXT:    retq
5364;
5365; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f64:
5366; AVX1:       # %bb.0: # %entry
5367; AVX1-NEXT:    vmovsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0]
5368; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = [9.2233720368547758E+18,0.0E+0]
5369; AVX1-NEXT:    vcomisd %xmm2, %xmm0
5370; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
5371; AVX1-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
5372; AVX1-NEXT:    ja .LBB130_2
5373; AVX1-NEXT:  # %bb.1: # %entry
5374; AVX1-NEXT:    vmovapd %xmm0, %xmm3
5375; AVX1-NEXT:  .LBB130_2: # %entry
5376; AVX1-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
5377; AVX1-NEXT:    vcvttsd2si %xmm2, %rax
5378; AVX1-NEXT:    setbe %cl
5379; AVX1-NEXT:    movzbl %cl, %ecx
5380; AVX1-NEXT:    shlq $63, %rcx
5381; AVX1-NEXT:    xorq %rax, %rcx
5382; AVX1-NEXT:    vmovq %rcx, %xmm2
5383; AVX1-NEXT:    vmovsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0]
5384; AVX1-NEXT:    vcomisd %xmm3, %xmm0
5385; AVX1-NEXT:    ja .LBB130_4
5386; AVX1-NEXT:  # %bb.3: # %entry
5387; AVX1-NEXT:    vmovapd %xmm0, %xmm1
5388; AVX1-NEXT:  .LBB130_4: # %entry
5389; AVX1-NEXT:    vsubsd %xmm1, %xmm3, %xmm0
5390; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
5391; AVX1-NEXT:    setbe %cl
5392; AVX1-NEXT:    movzbl %cl, %ecx
5393; AVX1-NEXT:    shlq $63, %rcx
5394; AVX1-NEXT:    xorq %rax, %rcx
5395; AVX1-NEXT:    vmovq %rcx, %xmm0
5396; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
5397; AVX1-NEXT:    retq
5398;
5399; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f64:
5400; AVX512F:       # %bb.0: # %entry
5401; AVX512F-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5402; AVX512F-NEXT:    vmovq %rax, %xmm0
5403; AVX512F-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5404; AVX512F-NEXT:    vmovq %rax, %xmm1
5405; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
5406; AVX512F-NEXT:    retq
5407;
5408; AVX512DQ-LABEL: constrained_vector_fptoui_v2i64_v2f64:
5409; AVX512DQ:       # %bb.0: # %entry
5410; AVX512DQ-NEXT:    vmovaps {{.*#+}} xmm0 = [4.2100000000000001E+1,4.2200000000000003E+1]
5411; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
5412; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
5413; AVX512DQ-NEXT:    vzeroupper
5414; AVX512DQ-NEXT:    retq
5415entry:
5416  %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(
5417                                <2 x double><double 42.1, double 42.2>,
5418                                metadata !"fpexcept.strict") #0
5419  ret <2 x i64> %result
5420}
5421
5422define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
5423; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f64:
5424; CHECK:       # %bb.0: # %entry
5425; CHECK-NEXT:    movsd {{.*#+}} xmm2 = [4.2100000000000001E+1,0.0E+0]
5426; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
5427; CHECK-NEXT:    comisd %xmm2, %xmm1
5428; CHECK-NEXT:    xorpd %xmm0, %xmm0
5429; CHECK-NEXT:    xorpd %xmm3, %xmm3
5430; CHECK-NEXT:    ja .LBB131_2
5431; CHECK-NEXT:  # %bb.1: # %entry
5432; CHECK-NEXT:    movapd %xmm1, %xmm3
5433; CHECK-NEXT:  .LBB131_2: # %entry
5434; CHECK-NEXT:    subsd %xmm3, %xmm2
5435; CHECK-NEXT:    cvttsd2si %xmm2, %rcx
5436; CHECK-NEXT:    setbe %al
5437; CHECK-NEXT:    movzbl %al, %eax
5438; CHECK-NEXT:    shlq $63, %rax
5439; CHECK-NEXT:    xorq %rcx, %rax
5440; CHECK-NEXT:    movsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0]
5441; CHECK-NEXT:    comisd %xmm2, %xmm1
5442; CHECK-NEXT:    xorpd %xmm3, %xmm3
5443; CHECK-NEXT:    ja .LBB131_4
5444; CHECK-NEXT:  # %bb.3: # %entry
5445; CHECK-NEXT:    movapd %xmm1, %xmm3
5446; CHECK-NEXT:  .LBB131_4: # %entry
5447; CHECK-NEXT:    subsd %xmm3, %xmm2
5448; CHECK-NEXT:    cvttsd2si %xmm2, %rcx
5449; CHECK-NEXT:    setbe %dl
5450; CHECK-NEXT:    movzbl %dl, %edx
5451; CHECK-NEXT:    shlq $63, %rdx
5452; CHECK-NEXT:    xorq %rcx, %rdx
5453; CHECK-NEXT:    movsd {{.*#+}} xmm2 = [4.2299999999999997E+1,0.0E+0]
5454; CHECK-NEXT:    comisd %xmm2, %xmm1
5455; CHECK-NEXT:    ja .LBB131_6
5456; CHECK-NEXT:  # %bb.5: # %entry
5457; CHECK-NEXT:    movapd %xmm1, %xmm0
5458; CHECK-NEXT:  .LBB131_6: # %entry
5459; CHECK-NEXT:    subsd %xmm0, %xmm2
5460; CHECK-NEXT:    cvttsd2si %xmm2, %rsi
5461; CHECK-NEXT:    setbe %cl
5462; CHECK-NEXT:    movzbl %cl, %ecx
5463; CHECK-NEXT:    shlq $63, %rcx
5464; CHECK-NEXT:    xorq %rsi, %rcx
5465; CHECK-NEXT:    retq
5466;
5467; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f64:
5468; AVX1:       # %bb.0: # %entry
5469; AVX1-NEXT:    vmovsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0]
5470; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = [9.2233720368547758E+18,0.0E+0]
5471; AVX1-NEXT:    vcomisd %xmm2, %xmm0
5472; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
5473; AVX1-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
5474; AVX1-NEXT:    ja .LBB131_2
5475; AVX1-NEXT:  # %bb.1: # %entry
5476; AVX1-NEXT:    vmovapd %xmm0, %xmm3
5477; AVX1-NEXT:  .LBB131_2: # %entry
5478; AVX1-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
5479; AVX1-NEXT:    vcvttsd2si %xmm2, %rax
5480; AVX1-NEXT:    setbe %cl
5481; AVX1-NEXT:    movzbl %cl, %ecx
5482; AVX1-NEXT:    shlq $63, %rcx
5483; AVX1-NEXT:    xorq %rax, %rcx
5484; AVX1-NEXT:    vmovq %rcx, %xmm2
5485; AVX1-NEXT:    vmovsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0]
5486; AVX1-NEXT:    vcomisd %xmm3, %xmm0
5487; AVX1-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
5488; AVX1-NEXT:    ja .LBB131_4
5489; AVX1-NEXT:  # %bb.3: # %entry
5490; AVX1-NEXT:    vmovapd %xmm0, %xmm4
5491; AVX1-NEXT:  .LBB131_4: # %entry
5492; AVX1-NEXT:    vsubsd %xmm4, %xmm3, %xmm3
5493; AVX1-NEXT:    vcvttsd2si %xmm3, %rax
5494; AVX1-NEXT:    setbe %cl
5495; AVX1-NEXT:    movzbl %cl, %ecx
5496; AVX1-NEXT:    shlq $63, %rcx
5497; AVX1-NEXT:    xorq %rax, %rcx
5498; AVX1-NEXT:    vmovq %rcx, %xmm3
5499; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
5500; AVX1-NEXT:    vmovsd {{.*#+}} xmm3 = [4.2299999999999997E+1,0.0E+0]
5501; AVX1-NEXT:    vcomisd %xmm3, %xmm0
5502; AVX1-NEXT:    ja .LBB131_6
5503; AVX1-NEXT:  # %bb.5: # %entry
5504; AVX1-NEXT:    vmovapd %xmm0, %xmm1
5505; AVX1-NEXT:  .LBB131_6: # %entry
5506; AVX1-NEXT:    vsubsd %xmm1, %xmm3, %xmm0
5507; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
5508; AVX1-NEXT:    setbe %cl
5509; AVX1-NEXT:    movzbl %cl, %ecx
5510; AVX1-NEXT:    shlq $63, %rcx
5511; AVX1-NEXT:    xorq %rax, %rcx
5512; AVX1-NEXT:    vmovq %rcx, %xmm0
5513; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
5514; AVX1-NEXT:    retq
5515;
5516; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f64:
5517; AVX512:       # %bb.0: # %entry
5518; AVX512-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5519; AVX512-NEXT:    vmovq %rax, %xmm0
5520; AVX512-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5521; AVX512-NEXT:    vmovq %rax, %xmm1
5522; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
5523; AVX512-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5524; AVX512-NEXT:    vmovq %rax, %xmm1
5525; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
5526; AVX512-NEXT:    retq
5527entry:
5528  %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64(
5529                                <3 x double><double 42.1, double 42.2,
5530                                             double 42.3>,
5531                                metadata !"fpexcept.strict") #0
5532  ret <3 x i64> %result
5533}
5534
5535define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
5536; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f64:
5537; CHECK:       # %bb.0: # %entry
5538; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
5539; CHECK-NEXT:    movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0]
5540; CHECK-NEXT:    comisd %xmm0, %xmm2
5541; CHECK-NEXT:    xorpd %xmm1, %xmm1
5542; CHECK-NEXT:    xorpd %xmm3, %xmm3
5543; CHECK-NEXT:    ja .LBB132_2
5544; CHECK-NEXT:  # %bb.1: # %entry
5545; CHECK-NEXT:    movapd %xmm2, %xmm3
5546; CHECK-NEXT:  .LBB132_2: # %entry
5547; CHECK-NEXT:    subsd %xmm3, %xmm0
5548; CHECK-NEXT:    cvttsd2si %xmm0, %rcx
5549; CHECK-NEXT:    setbe %al
5550; CHECK-NEXT:    movzbl %al, %eax
5551; CHECK-NEXT:    shlq $63, %rax
5552; CHECK-NEXT:    xorq %rcx, %rax
5553; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
5554; CHECK-NEXT:    comisd %xmm0, %xmm2
5555; CHECK-NEXT:    xorpd %xmm4, %xmm4
5556; CHECK-NEXT:    ja .LBB132_4
5557; CHECK-NEXT:  # %bb.3: # %entry
5558; CHECK-NEXT:    movapd %xmm2, %xmm4
5559; CHECK-NEXT:  .LBB132_4: # %entry
5560; CHECK-NEXT:    movq %rax, %xmm3
5561; CHECK-NEXT:    subsd %xmm4, %xmm0
5562; CHECK-NEXT:    cvttsd2si %xmm0, %rax
5563; CHECK-NEXT:    setbe %cl
5564; CHECK-NEXT:    movzbl %cl, %ecx
5565; CHECK-NEXT:    shlq $63, %rcx
5566; CHECK-NEXT:    xorq %rax, %rcx
5567; CHECK-NEXT:    movq %rcx, %xmm0
5568; CHECK-NEXT:    movsd {{.*#+}} xmm4 = [4.2399999999999999E+1,0.0E+0]
5569; CHECK-NEXT:    comisd %xmm4, %xmm2
5570; CHECK-NEXT:    xorpd %xmm5, %xmm5
5571; CHECK-NEXT:    ja .LBB132_6
5572; CHECK-NEXT:  # %bb.5: # %entry
5573; CHECK-NEXT:    movapd %xmm2, %xmm5
5574; CHECK-NEXT:  .LBB132_6: # %entry
5575; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
5576; CHECK-NEXT:    subsd %xmm5, %xmm4
5577; CHECK-NEXT:    cvttsd2si %xmm4, %rax
5578; CHECK-NEXT:    setbe %cl
5579; CHECK-NEXT:    movzbl %cl, %ecx
5580; CHECK-NEXT:    shlq $63, %rcx
5581; CHECK-NEXT:    xorq %rax, %rcx
5582; CHECK-NEXT:    movq %rcx, %xmm3
5583; CHECK-NEXT:    movsd {{.*#+}} xmm4 = [4.2299999999999997E+1,0.0E+0]
5584; CHECK-NEXT:    comisd %xmm4, %xmm2
5585; CHECK-NEXT:    ja .LBB132_8
5586; CHECK-NEXT:  # %bb.7: # %entry
5587; CHECK-NEXT:    movapd %xmm2, %xmm1
5588; CHECK-NEXT:  .LBB132_8: # %entry
5589; CHECK-NEXT:    subsd %xmm1, %xmm4
5590; CHECK-NEXT:    cvttsd2si %xmm4, %rax
5591; CHECK-NEXT:    setbe %cl
5592; CHECK-NEXT:    movzbl %cl, %ecx
5593; CHECK-NEXT:    shlq $63, %rcx
5594; CHECK-NEXT:    xorq %rax, %rcx
5595; CHECK-NEXT:    movq %rcx, %xmm1
5596; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
5597; CHECK-NEXT:    retq
5598;
5599; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f64:
5600; AVX1:       # %bb.0: # %entry
5601; AVX1-NEXT:    vmovsd {{.*#+}} xmm2 = [4.2399999999999999E+1,0.0E+0]
5602; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = [9.2233720368547758E+18,0.0E+0]
5603; AVX1-NEXT:    vcomisd %xmm2, %xmm0
5604; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
5605; AVX1-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
5606; AVX1-NEXT:    ja .LBB132_2
5607; AVX1-NEXT:  # %bb.1: # %entry
5608; AVX1-NEXT:    vmovapd %xmm0, %xmm3
5609; AVX1-NEXT:  .LBB132_2: # %entry
5610; AVX1-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
5611; AVX1-NEXT:    vcvttsd2si %xmm2, %rcx
5612; AVX1-NEXT:    setbe %al
5613; AVX1-NEXT:    movzbl %al, %eax
5614; AVX1-NEXT:    shlq $63, %rax
5615; AVX1-NEXT:    xorq %rcx, %rax
5616; AVX1-NEXT:    vmovsd {{.*#+}} xmm3 = [4.2299999999999997E+1,0.0E+0]
5617; AVX1-NEXT:    vcomisd %xmm3, %xmm0
5618; AVX1-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
5619; AVX1-NEXT:    ja .LBB132_4
5620; AVX1-NEXT:  # %bb.3: # %entry
5621; AVX1-NEXT:    vmovapd %xmm0, %xmm4
5622; AVX1-NEXT:  .LBB132_4: # %entry
5623; AVX1-NEXT:    vmovq %rax, %xmm2
5624; AVX1-NEXT:    vsubsd %xmm4, %xmm3, %xmm3
5625; AVX1-NEXT:    vcvttsd2si %xmm3, %rax
5626; AVX1-NEXT:    setbe %cl
5627; AVX1-NEXT:    movzbl %cl, %ecx
5628; AVX1-NEXT:    shlq $63, %rcx
5629; AVX1-NEXT:    xorq %rax, %rcx
5630; AVX1-NEXT:    vmovq %rcx, %xmm3
5631; AVX1-NEXT:    vmovsd {{.*#+}} xmm4 = [4.2200000000000003E+1,0.0E+0]
5632; AVX1-NEXT:    vcomisd %xmm4, %xmm0
5633; AVX1-NEXT:    vxorpd %xmm5, %xmm5, %xmm5
5634; AVX1-NEXT:    ja .LBB132_6
5635; AVX1-NEXT:  # %bb.5: # %entry
5636; AVX1-NEXT:    vmovapd %xmm0, %xmm5
5637; AVX1-NEXT:  .LBB132_6: # %entry
5638; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
5639; AVX1-NEXT:    vsubsd %xmm5, %xmm4, %xmm3
5640; AVX1-NEXT:    vcvttsd2si %xmm3, %rax
5641; AVX1-NEXT:    setbe %cl
5642; AVX1-NEXT:    movzbl %cl, %ecx
5643; AVX1-NEXT:    shlq $63, %rcx
5644; AVX1-NEXT:    xorq %rax, %rcx
5645; AVX1-NEXT:    vmovq %rcx, %xmm3
5646; AVX1-NEXT:    vmovsd {{.*#+}} xmm4 = [4.2100000000000001E+1,0.0E+0]
5647; AVX1-NEXT:    vcomisd %xmm4, %xmm0
5648; AVX1-NEXT:    ja .LBB132_8
5649; AVX1-NEXT:  # %bb.7: # %entry
5650; AVX1-NEXT:    vmovapd %xmm0, %xmm1
5651; AVX1-NEXT:  .LBB132_8: # %entry
5652; AVX1-NEXT:    vsubsd %xmm1, %xmm4, %xmm0
5653; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
5654; AVX1-NEXT:    setbe %cl
5655; AVX1-NEXT:    movzbl %cl, %ecx
5656; AVX1-NEXT:    shlq $63, %rcx
5657; AVX1-NEXT:    xorq %rax, %rcx
5658; AVX1-NEXT:    vmovq %rcx, %xmm0
5659; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
5660; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
5661; AVX1-NEXT:    retq
5662;
5663; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f64:
5664; AVX512F:       # %bb.0: # %entry
5665; AVX512F-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5666; AVX512F-NEXT:    vmovq %rax, %xmm0
5667; AVX512F-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5668; AVX512F-NEXT:    vmovq %rax, %xmm1
5669; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
5670; AVX512F-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5671; AVX512F-NEXT:    vmovq %rax, %xmm1
5672; AVX512F-NEXT:    vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax
5673; AVX512F-NEXT:    vmovq %rax, %xmm2
5674; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
5675; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
5676; AVX512F-NEXT:    retq
5677;
5678; AVX512DQ-LABEL: constrained_vector_fptoui_v4i64_v4f64:
5679; AVX512DQ:       # %bb.0: # %entry
5680; AVX512DQ-NEXT:    vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
5681; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
5682; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
5683; AVX512DQ-NEXT:    retq
5684entry:
5685  %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(
5686                                <4 x double><double 42.1, double 42.2,
5687                                             double 42.3, double 42.4>,
5688                                metadata !"fpexcept.strict") #0
5689  ret <4 x i64> %result
5690}
5691
5692
5693define <1 x float> @constrained_vector_fptrunc_v1f64() #0 {
5694; CHECK-LABEL: constrained_vector_fptrunc_v1f64:
5695; CHECK:       # %bb.0: # %entry
5696; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
5697; CHECK-NEXT:    cvtsd2ss %xmm0, %xmm0
5698; CHECK-NEXT:    retq
5699;
5700; AVX-LABEL: constrained_vector_fptrunc_v1f64:
5701; AVX:       # %bb.0: # %entry
5702; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
5703; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
5704; AVX-NEXT:    retq
5705entry:
5706  %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(
5707                                <1 x double><double 42.1>,
5708                                metadata !"round.dynamic",
5709                                metadata !"fpexcept.strict") #0
5710  ret <1 x float> %result
5711}
5712
5713define <2 x float> @constrained_vector_fptrunc_v2f64() #0 {
5714; CHECK-LABEL: constrained_vector_fptrunc_v2f64:
5715; CHECK:       # %bb.0: # %entry
5716; CHECK-NEXT:    cvtpd2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5717; CHECK-NEXT:    retq
5718;
5719; AVX-LABEL: constrained_vector_fptrunc_v2f64:
5720; AVX:       # %bb.0: # %entry
5721; AVX-NEXT:    vcvtpd2psx {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5722; AVX-NEXT:    retq
5723entry:
5724  %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
5725                                <2 x double><double 42.1, double 42.2>,
5726                                metadata !"round.dynamic",
5727                                metadata !"fpexcept.strict") #0
5728  ret <2 x float> %result
5729}
5730
5731define <3 x float> @constrained_vector_fptrunc_v3f64() #0 {
5732; CHECK-LABEL: constrained_vector_fptrunc_v3f64:
5733; CHECK:       # %bb.0: # %entry
5734; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
5735; CHECK-NEXT:    cvtsd2ss %xmm0, %xmm1
5736; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
5737; CHECK-NEXT:    cvtsd2ss %xmm0, %xmm0
5738; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5739; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [4.2299999999999997E+1,0.0E+0]
5740; CHECK-NEXT:    cvtsd2ss %xmm1, %xmm1
5741; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5742; CHECK-NEXT:    retq
5743;
5744; AVX-LABEL: constrained_vector_fptrunc_v3f64:
5745; AVX:       # %bb.0: # %entry
5746; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
5747; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
5748; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.2100000000000001E+1,0.0E+0]
5749; AVX-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1
5750; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
5751; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [4.2299999999999997E+1,0.0E+0]
5752; AVX-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1
5753; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
5754; AVX-NEXT:    retq
5755entry:
5756  %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
5757                                <3 x double><double 42.1, double 42.2,
5758                                             double 42.3>,
5759                                metadata !"round.dynamic",
5760                                metadata !"fpexcept.strict") #0
5761  ret <3 x float> %result
5762}
5763
5764define <4 x float> @constrained_vector_fptrunc_v4f64() #0 {
5765; CHECK-LABEL: constrained_vector_fptrunc_v4f64:
5766; CHECK:       # %bb.0: # %entry
5767; CHECK-NEXT:    cvtpd2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5768; CHECK-NEXT:    cvtpd2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5769; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
5770; CHECK-NEXT:    retq
5771;
5772; AVX-LABEL: constrained_vector_fptrunc_v4f64:
5773; AVX:       # %bb.0: # %entry
5774; AVX-NEXT:    vcvtpd2psy {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5775; AVX-NEXT:    retq
5776entry:
5777  %result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
5778                                <4 x double><double 42.1, double 42.2,
5779                                             double 42.3, double 42.4>,
5780                                metadata !"round.dynamic",
5781                                metadata !"fpexcept.strict") #0
5782  ret <4 x float> %result
5783}
5784
5785define <1 x double> @constrained_vector_fpext_v1f32() #0 {
5786; CHECK-LABEL: constrained_vector_fpext_v1f32:
5787; CHECK:       # %bb.0: # %entry
5788; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
5789; CHECK-NEXT:    cvtss2sd %xmm0, %xmm0
5790; CHECK-NEXT:    retq
5791;
5792; AVX-LABEL: constrained_vector_fpext_v1f32:
5793; AVX:       # %bb.0: # %entry
5794; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
5795; AVX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
5796; AVX-NEXT:    retq
5797entry:
5798  %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
5799                                <1 x float><float 42.0>,
5800                                metadata !"fpexcept.strict") #0
5801  ret <1 x double> %result
5802}
5803
5804define <2 x double> @constrained_vector_fpext_v2f32() #0 {
5805; CHECK-LABEL: constrained_vector_fpext_v2f32:
5806; CHECK:       # %bb.0: # %entry
5807; CHECK-NEXT:    cvtps2pd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5808; CHECK-NEXT:    retq
5809;
5810; AVX-LABEL: constrained_vector_fpext_v2f32:
5811; AVX:       # %bb.0: # %entry
5812; AVX-NEXT:    vcvtps2pd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5813; AVX-NEXT:    retq
5814entry:
5815  %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
5816                                <2 x float><float 42.0, float 43.0>,
5817                                metadata !"fpexcept.strict") #0
5818  ret <2 x double> %result
5819}
5820
5821define <3 x double> @constrained_vector_fpext_v3f32() #0 {
5822; CHECK-LABEL: constrained_vector_fpext_v3f32:
5823; CHECK:       # %bb.0: # %entry
5824; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
5825; CHECK-NEXT:    cvtss2sd %xmm0, %xmm1
5826; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
5827; CHECK-NEXT:    cvtss2sd %xmm0, %xmm0
5828; CHECK-NEXT:    movss {{.*#+}} xmm2 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
5829; CHECK-NEXT:    cvtss2sd %xmm2, %xmm2
5830; CHECK-NEXT:    movsd %xmm2, -{{[0-9]+}}(%rsp)
5831; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
5832; CHECK-NEXT:    wait
5833; CHECK-NEXT:    retq
5834;
5835; AVX-LABEL: constrained_vector_fpext_v3f32:
5836; AVX:       # %bb.0: # %entry
5837; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
5838; AVX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
5839; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
5840; AVX-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
5841; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
5842; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
5843; AVX-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
5844; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
5845; AVX-NEXT:    retq
5846entry:
5847  %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
5848                                <3 x float><float 42.0, float 43.0,
5849                                            float 44.0>,
5850                                metadata !"fpexcept.strict") #0
5851  ret <3 x double> %result
5852}
5853
5854define <4 x double> @constrained_vector_fpext_v4f32() #0 {
5855; CHECK-LABEL: constrained_vector_fpext_v4f32:
5856; CHECK:       # %bb.0: # %entry
5857; CHECK-NEXT:    cvtps2pd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5858; CHECK-NEXT:    cvtps2pd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5859; CHECK-NEXT:    retq
5860;
5861; AVX-LABEL: constrained_vector_fpext_v4f32:
5862; AVX:       # %bb.0: # %entry
5863; AVX-NEXT:    vcvtps2pd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
5864; AVX-NEXT:    retq
5865entry:
5866  %result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
5867                                <4 x float><float 42.0, float 43.0,
5868                                            float 44.0, float 45.0>,
5869                                metadata !"fpexcept.strict") #0
5870  ret <4 x double> %result
5871}
5872
5873define <1 x float> @constrained_vector_ceil_v1f32_var(ptr %a) #0 {
5874; CHECK-LABEL: constrained_vector_ceil_v1f32_var:
5875; CHECK:       # %bb.0: # %entry
5876; CHECK-NEXT:    pushq %rax
5877; CHECK-NEXT:    .cfi_def_cfa_offset 16
5878; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5879; CHECK-NEXT:    callq ceilf@PLT
5880; CHECK-NEXT:    popq %rax
5881; CHECK-NEXT:    .cfi_def_cfa_offset 8
5882; CHECK-NEXT:    retq
5883;
5884; AVX-LABEL: constrained_vector_ceil_v1f32_var:
5885; AVX:       # %bb.0: # %entry
5886; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5887; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0
5888; AVX-NEXT:    retq
5889entry:
5890  %b = load <1 x float>, ptr %a
5891  %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
5892                               <1 x float> %b,
5893                               metadata !"fpexcept.strict") #0
5894  ret <1 x float> %ceil
5895}
5896
5897define <2 x double> @constrained_vector_ceil_v2f64_var(ptr %a) #0 {
5898; CHECK-LABEL: constrained_vector_ceil_v2f64_var:
5899; CHECK:       # %bb.0: # %entry
5900; CHECK-NEXT:    subq $40, %rsp
5901; CHECK-NEXT:    .cfi_def_cfa_offset 48
5902; CHECK-NEXT:    movaps (%rdi), %xmm0
5903; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
5904; CHECK-NEXT:    callq ceil@PLT
5905; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5906; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
5907; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
5908; CHECK-NEXT:    callq ceil@PLT
5909; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
5910; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
5911; CHECK-NEXT:    movaps %xmm1, %xmm0
5912; CHECK-NEXT:    addq $40, %rsp
5913; CHECK-NEXT:    .cfi_def_cfa_offset 8
5914; CHECK-NEXT:    retq
5915;
5916; AVX-LABEL: constrained_vector_ceil_v2f64_var:
5917; AVX:       # %bb.0: # %entry
5918; AVX-NEXT:    vroundpd $10, (%rdi), %xmm0
5919; AVX-NEXT:    retq
5920entry:
5921  %b = load <2 x double>, ptr %a
5922  %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
5923                                <2 x double> %b,
5924                                metadata !"fpexcept.strict") #0
5925  ret <2 x double> %ceil
5926}
5927
5928define <3 x float> @constrained_vector_ceil_v3f32_var(ptr %a) #0 {
5929; CHECK-LABEL: constrained_vector_ceil_v3f32_var:
5930; CHECK:       # %bb.0: # %entry
5931; CHECK-NEXT:    subq $56, %rsp
5932; CHECK-NEXT:    .cfi_def_cfa_offset 64
5933; CHECK-NEXT:    movaps (%rdi), %xmm0
5934; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
5935; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
5936; CHECK-NEXT:    callq ceilf@PLT
5937; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5938; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
5939; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
5940; CHECK-NEXT:    callq ceilf@PLT
5941; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5942; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
5943; CHECK-NEXT:    callq ceilf@PLT
5944; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
5945; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
5946; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
5947; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
5948; CHECK-NEXT:    addq $56, %rsp
5949; CHECK-NEXT:    .cfi_def_cfa_offset 8
5950; CHECK-NEXT:    retq
5951;
5952; AVX-LABEL: constrained_vector_ceil_v3f32_var:
5953; AVX:       # %bb.0: # %entry
5954; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
5955; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0
5956; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
5957; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
5958; AVX-NEXT:    vroundss $10, %xmm1, %xmm1, %xmm1
5959; AVX-NEXT:    vroundss $10, %xmm2, %xmm2, %xmm2
5960; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
5961; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
5962; AVX-NEXT:    retq
5963entry:
5964  %b = load <3 x float>, ptr %a
5965  %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
5966                              <3 x float> %b,
5967                              metadata !"fpexcept.strict") #0
5968  ret <3 x float> %ceil
5969}
5970
5971define <3 x double> @constrained_vector_ceil_v3f64_var(ptr %a) #0 {
5972; CHECK-LABEL: constrained_vector_ceil_v3f64_var:
5973; CHECK:       # %bb.0: # %entry
5974; CHECK-NEXT:    subq $40, %rsp
5975; CHECK-NEXT:    .cfi_def_cfa_offset 48
5976; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
5977; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
5978; CHECK-NEXT:    movaps (%rdi), %xmm0
5979; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
5980; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
5981; CHECK-NEXT:    callq ceil@PLT
5982; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
5983; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
5984; CHECK-NEXT:    callq ceil@PLT
5985; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
5986; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
5987; CHECK-NEXT:    # xmm0 = mem[0],zero
5988; CHECK-NEXT:    callq ceil@PLT
5989; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
5990; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
5991; CHECK-NEXT:    wait
5992; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
5993; CHECK-NEXT:    # xmm0 = mem[0],zero
5994; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
5995; CHECK-NEXT:    # xmm1 = mem[0],zero
5996; CHECK-NEXT:    addq $40, %rsp
5997; CHECK-NEXT:    .cfi_def_cfa_offset 8
5998; CHECK-NEXT:    retq
5999;
6000; AVX-LABEL: constrained_vector_ceil_v3f64_var:
6001; AVX:       # %bb.0: # %entry
6002; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
6003; AVX-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0
6004; AVX-NEXT:    vroundpd $10, (%rdi), %xmm1
6005; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
6006; AVX-NEXT:    retq
6007entry:
6008  %b = load <3 x double>, ptr %a
6009  %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
6010                          <3 x double> %b,
6011                          metadata !"fpexcept.strict") #0
6012  ret <3 x double> %ceil
6013}
6014
6015define <1 x float> @constrained_vector_floor_v1f32_var(ptr %a) #0 {
6016; CHECK-LABEL: constrained_vector_floor_v1f32_var:
6017; CHECK:       # %bb.0: # %entry
6018; CHECK-NEXT:    pushq %rax
6019; CHECK-NEXT:    .cfi_def_cfa_offset 16
6020; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6021; CHECK-NEXT:    callq floorf@PLT
6022; CHECK-NEXT:    popq %rax
6023; CHECK-NEXT:    .cfi_def_cfa_offset 8
6024; CHECK-NEXT:    retq
6025;
6026; AVX-LABEL: constrained_vector_floor_v1f32_var:
6027; AVX:       # %bb.0: # %entry
6028; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6029; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0
6030; AVX-NEXT:    retq
6031entry:
6032  %b = load <1 x float>, ptr %a
6033  %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
6034                               <1 x float> %b,
6035                               metadata !"fpexcept.strict") #0
6036  ret <1 x float> %floor
6037}
6038
6039
6040define <2 x double> @constrained_vector_floor_v2f64_var(ptr %a) #0 {
6041; CHECK-LABEL: constrained_vector_floor_v2f64_var:
6042; CHECK:       # %bb.0: # %entry
6043; CHECK-NEXT:    subq $40, %rsp
6044; CHECK-NEXT:    .cfi_def_cfa_offset 48
6045; CHECK-NEXT:    movaps (%rdi), %xmm0
6046; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
6047; CHECK-NEXT:    callq floor@PLT
6048; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
6049; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
6050; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
6051; CHECK-NEXT:    callq floor@PLT
6052; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
6053; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
6054; CHECK-NEXT:    movaps %xmm1, %xmm0
6055; CHECK-NEXT:    addq $40, %rsp
6056; CHECK-NEXT:    .cfi_def_cfa_offset 8
6057; CHECK-NEXT:    retq
6058;
6059; AVX-LABEL: constrained_vector_floor_v2f64_var:
6060; AVX:       # %bb.0: # %entry
6061; AVX-NEXT:    vroundpd $9, (%rdi), %xmm0
6062; AVX-NEXT:    retq
6063entry:
6064  %b = load <2 x double>, ptr %a
6065  %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
6066                                <2 x double> %b,
6067                                metadata !"fpexcept.strict") #0
6068  ret <2 x double> %floor
6069}
6070
6071define <3 x float> @constrained_vector_floor_v3f32_var(ptr %a) #0 {
6072; CHECK-LABEL: constrained_vector_floor_v3f32_var:
6073; CHECK:       # %bb.0: # %entry
6074; CHECK-NEXT:    subq $56, %rsp
6075; CHECK-NEXT:    .cfi_def_cfa_offset 64
6076; CHECK-NEXT:    movaps (%rdi), %xmm0
6077; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
6078; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
6079; CHECK-NEXT:    callq floorf@PLT
6080; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
6081; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
6082; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
6083; CHECK-NEXT:    callq floorf@PLT
6084; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
6085; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
6086; CHECK-NEXT:    callq floorf@PLT
6087; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
6088; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
6089; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
6090; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
6091; CHECK-NEXT:    addq $56, %rsp
6092; CHECK-NEXT:    .cfi_def_cfa_offset 8
6093; CHECK-NEXT:    retq
6094;
6095; AVX-LABEL: constrained_vector_floor_v3f32_var:
6096; AVX:       # %bb.0: # %entry
6097; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6098; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0
6099; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
6100; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
6101; AVX-NEXT:    vroundss $9, %xmm1, %xmm1, %xmm1
6102; AVX-NEXT:    vroundss $9, %xmm2, %xmm2, %xmm2
6103; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
6104; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
6105; AVX-NEXT:    retq
6106entry:
6107  %b = load <3 x float>, ptr %a
6108  %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
6109                              <3 x float> %b,
6110                              metadata !"fpexcept.strict") #0
6111  ret <3 x float> %floor
6112}
6113
6114define <3 x double> @constrained_vector_floor_v3f64_var(ptr %a) #0 {
6115; CHECK-LABEL: constrained_vector_floor_v3f64_var:
6116; CHECK:       # %bb.0: # %entry
6117; CHECK-NEXT:    subq $40, %rsp
6118; CHECK-NEXT:    .cfi_def_cfa_offset 48
6119; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
6120; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
6121; CHECK-NEXT:    movaps (%rdi), %xmm0
6122; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
6123; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
6124; CHECK-NEXT:    callq floor@PLT
6125; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
6126; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
6127; CHECK-NEXT:    callq floor@PLT
6128; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
6129; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
6130; CHECK-NEXT:    # xmm0 = mem[0],zero
6131; CHECK-NEXT:    callq floor@PLT
6132; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
6133; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
6134; CHECK-NEXT:    wait
6135; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
6136; CHECK-NEXT:    # xmm0 = mem[0],zero
6137; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
6138; CHECK-NEXT:    # xmm1 = mem[0],zero
6139; CHECK-NEXT:    addq $40, %rsp
6140; CHECK-NEXT:    .cfi_def_cfa_offset 8
6141; CHECK-NEXT:    retq
6142;
6143; AVX-LABEL: constrained_vector_floor_v3f64_var:
6144; AVX:       # %bb.0: # %entry
6145; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
6146; AVX-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0
6147; AVX-NEXT:    vroundpd $9, (%rdi), %xmm1
6148; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
6149; AVX-NEXT:    retq
6150entry:
6151  %b = load <3 x double>, ptr %a
6152  %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
6153                          <3 x double> %b,
6154                          metadata !"fpexcept.strict") #0
6155  ret <3 x double> %floor
6156}
6157
6158define <1 x float> @constrained_vector_round_v1f32_var(ptr %a) #0 {
6159; CHECK-LABEL: constrained_vector_round_v1f32_var:
6160; CHECK:       # %bb.0: # %entry
6161; CHECK-NEXT:    pushq %rax
6162; CHECK-NEXT:    .cfi_def_cfa_offset 16
6163; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6164; CHECK-NEXT:    callq roundf@PLT
6165; CHECK-NEXT:    popq %rax
6166; CHECK-NEXT:    .cfi_def_cfa_offset 8
6167; CHECK-NEXT:    retq
6168;
6169; AVX-LABEL: constrained_vector_round_v1f32_var:
6170; AVX:       # %bb.0: # %entry
6171; AVX-NEXT:    pushq %rax
6172; AVX-NEXT:    .cfi_def_cfa_offset 16
6173; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6174; AVX-NEXT:    callq roundf@PLT
6175; AVX-NEXT:    popq %rax
6176; AVX-NEXT:    .cfi_def_cfa_offset 8
6177; AVX-NEXT:    retq
6178entry:
6179  %b = load <1 x float>, ptr %a
6180  %round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
6181                               <1 x float> %b,
6182                               metadata !"fpexcept.strict") #0
6183  ret <1 x float> %round
6184}
6185
6186define <2 x double> @constrained_vector_round_v2f64_var(ptr %a) #0 {
6187; CHECK-LABEL: constrained_vector_round_v2f64_var:
6188; CHECK:       # %bb.0: # %entry
6189; CHECK-NEXT:    subq $40, %rsp
6190; CHECK-NEXT:    .cfi_def_cfa_offset 48
6191; CHECK-NEXT:    movaps (%rdi), %xmm0
6192; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
6193; CHECK-NEXT:    callq round@PLT
6194; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
6195; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
6196; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
6197; CHECK-NEXT:    callq round@PLT
6198; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
6199; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
6200; CHECK-NEXT:    movaps %xmm1, %xmm0
6201; CHECK-NEXT:    addq $40, %rsp
6202; CHECK-NEXT:    .cfi_def_cfa_offset 8
6203; CHECK-NEXT:    retq
6204;
6205; AVX-LABEL: constrained_vector_round_v2f64_var:
6206; AVX:       # %bb.0: # %entry
6207; AVX-NEXT:    subq $40, %rsp
6208; AVX-NEXT:    .cfi_def_cfa_offset 48
6209; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
6210; AVX-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
6211; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
6212; AVX-NEXT:    callq round@PLT
6213; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
6214; AVX-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
6215; AVX-NEXT:    # xmm0 = mem[0],zero
6216; AVX-NEXT:    callq round@PLT
6217; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
6218; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
6219; AVX-NEXT:    addq $40, %rsp
6220; AVX-NEXT:    .cfi_def_cfa_offset 8
6221; AVX-NEXT:    retq
6222entry:
6223  %b = load <2 x double>, ptr %a
6224  %round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
6225                                <2 x double> %b,
6226                                metadata !"fpexcept.strict") #0
6227  ret <2 x double> %round
6228}
6229
6230define <3 x float> @constrained_vector_round_v3f32_var(ptr %a) #0 {
6231; CHECK-LABEL: constrained_vector_round_v3f32_var:
6232; CHECK:       # %bb.0: # %entry
6233; CHECK-NEXT:    subq $56, %rsp
6234; CHECK-NEXT:    .cfi_def_cfa_offset 64
6235; CHECK-NEXT:    movaps (%rdi), %xmm0
6236; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
6237; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
6238; CHECK-NEXT:    callq roundf@PLT
6239; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
6240; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
6241; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
6242; CHECK-NEXT:    callq roundf@PLT
6243; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
6244; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
6245; CHECK-NEXT:    callq roundf@PLT
6246; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
6247; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
6248; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
6249; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
6250; CHECK-NEXT:    addq $56, %rsp
6251; CHECK-NEXT:    .cfi_def_cfa_offset 8
6252; CHECK-NEXT:    retq
6253;
6254; AVX-LABEL: constrained_vector_round_v3f32_var:
6255; AVX:       # %bb.0: # %entry
6256; AVX-NEXT:    pushq %rbx
6257; AVX-NEXT:    .cfi_def_cfa_offset 16
6258; AVX-NEXT:    subq $48, %rsp
6259; AVX-NEXT:    .cfi_def_cfa_offset 64
6260; AVX-NEXT:    .cfi_offset %rbx, -16
6261; AVX-NEXT:    movq %rdi, %rbx
6262; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6263; AVX-NEXT:    callq roundf@PLT
6264; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
6265; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6266; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
6267; AVX-NEXT:    vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
6268; AVX-NEXT:    callq roundf@PLT
6269; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
6270; AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
6271; AVX-NEXT:    # xmm0 = mem[0],zero,zero,zero
6272; AVX-NEXT:    callq roundf@PLT
6273; AVX-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
6274; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
6275; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
6276; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
6277; AVX-NEXT:    addq $48, %rsp
6278; AVX-NEXT:    .cfi_def_cfa_offset 16
6279; AVX-NEXT:    popq %rbx
6280; AVX-NEXT:    .cfi_def_cfa_offset 8
6281; AVX-NEXT:    retq
6282entry:
6283  %b = load <3 x float>, ptr %a
6284  %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
6285                              <3 x float> %b,
6286                              metadata !"fpexcept.strict") #0
6287  ret <3 x float> %round
6288}
6289
6290
6291define <3 x double> @constrained_vector_round_v3f64_var(ptr %a) #0 {
6292; CHECK-LABEL: constrained_vector_round_v3f64_var:
6293; CHECK:       # %bb.0: # %entry
6294; CHECK-NEXT:    subq $40, %rsp
6295; CHECK-NEXT:    .cfi_def_cfa_offset 48
6296; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
6297; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
6298; CHECK-NEXT:    movaps (%rdi), %xmm0
6299; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
6300; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
6301; CHECK-NEXT:    callq round@PLT
6302; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
6303; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
6304; CHECK-NEXT:    callq round@PLT
6305; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
6306; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
6307; CHECK-NEXT:    # xmm0 = mem[0],zero
6308; CHECK-NEXT:    callq round@PLT
6309; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
6310; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
6311; CHECK-NEXT:    wait
6312; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
6313; CHECK-NEXT:    # xmm0 = mem[0],zero
6314; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
6315; CHECK-NEXT:    # xmm1 = mem[0],zero
6316; CHECK-NEXT:    addq $40, %rsp
6317; CHECK-NEXT:    .cfi_def_cfa_offset 8
6318; CHECK-NEXT:    retq
6319;
6320; AVX-LABEL: constrained_vector_round_v3f64_var:
6321; AVX:       # %bb.0: # %entry
6322; AVX-NEXT:    pushq %rbx
6323; AVX-NEXT:    .cfi_def_cfa_offset 16
6324; AVX-NEXT:    subq $48, %rsp
6325; AVX-NEXT:    .cfi_def_cfa_offset 64
6326; AVX-NEXT:    .cfi_offset %rbx, -16
6327; AVX-NEXT:    movq %rdi, %rbx
6328; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
6329; AVX-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
6330; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
6331; AVX-NEXT:    callq round@PLT
6332; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
6333; AVX-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
6334; AVX-NEXT:    # xmm0 = mem[0],zero
6335; AVX-NEXT:    callq round@PLT
6336; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
6337; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
6338; AVX-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
6339; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
6340; AVX-NEXT:    vzeroupper
6341; AVX-NEXT:    callq round@PLT
6342; AVX-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
6343; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
6344; AVX-NEXT:    addq $48, %rsp
6345; AVX-NEXT:    .cfi_def_cfa_offset 16
6346; AVX-NEXT:    popq %rbx
6347; AVX-NEXT:    .cfi_def_cfa_offset 8
6348; AVX-NEXT:    retq
6349entry:
6350  %b = load <3 x double>, ptr %a
6351  %round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
6352                          <3 x double> %b,
6353                          metadata !"fpexcept.strict") #0
6354  ret <3 x double> %round
6355}
6356
6357define <1 x float> @constrained_vector_trunc_v1f32_var(ptr %a) #0 {
6358; CHECK-LABEL: constrained_vector_trunc_v1f32_var:
6359; CHECK:       # %bb.0: # %entry
6360; CHECK-NEXT:    pushq %rax
6361; CHECK-NEXT:    .cfi_def_cfa_offset 16
6362; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6363; CHECK-NEXT:    callq truncf@PLT
6364; CHECK-NEXT:    popq %rax
6365; CHECK-NEXT:    .cfi_def_cfa_offset 8
6366; CHECK-NEXT:    retq
6367;
6368; AVX-LABEL: constrained_vector_trunc_v1f32_var:
6369; AVX:       # %bb.0: # %entry
6370; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6371; AVX-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
6372; AVX-NEXT:    retq
6373entry:
6374  %b = load <1 x float>, ptr %a
6375  %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
6376                               <1 x float> %b,
6377                               metadata !"fpexcept.strict") #0
6378  ret <1 x float> %trunc
6379}
6380
6381define <2 x double> @constrained_vector_trunc_v2f64_var(ptr %a) #0 {
6382; CHECK-LABEL: constrained_vector_trunc_v2f64_var:
6383; CHECK:       # %bb.0: # %entry
6384; CHECK-NEXT:    subq $40, %rsp
6385; CHECK-NEXT:    .cfi_def_cfa_offset 48
6386; CHECK-NEXT:    movaps (%rdi), %xmm0
6387; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
6388; CHECK-NEXT:    callq trunc@PLT
6389; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
6390; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
6391; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
6392; CHECK-NEXT:    callq trunc@PLT
6393; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
6394; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
6395; CHECK-NEXT:    movaps %xmm1, %xmm0
6396; CHECK-NEXT:    addq $40, %rsp
6397; CHECK-NEXT:    .cfi_def_cfa_offset 8
6398; CHECK-NEXT:    retq
6399;
6400; AVX-LABEL: constrained_vector_trunc_v2f64_var:
6401; AVX:       # %bb.0: # %entry
6402; AVX-NEXT:    vroundpd $11, (%rdi), %xmm0
6403; AVX-NEXT:    retq
6404entry:
6405  %b = load <2 x double>, ptr %a
6406  %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
6407                                <2 x double> %b,
6408                                metadata !"fpexcept.strict") #0
6409  ret <2 x double> %trunc
6410}
6411
6412define <3 x float> @constrained_vector_trunc_v3f32_var(ptr %a) #0 {
6413; CHECK-LABEL: constrained_vector_trunc_v3f32_var:
6414; CHECK:       # %bb.0: # %entry
6415; CHECK-NEXT:    subq $56, %rsp
6416; CHECK-NEXT:    .cfi_def_cfa_offset 64
6417; CHECK-NEXT:    movaps (%rdi), %xmm0
6418; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
6419; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
6420; CHECK-NEXT:    callq truncf@PLT
6421; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
6422; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
6423; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
6424; CHECK-NEXT:    callq truncf@PLT
6425; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
6426; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
6427; CHECK-NEXT:    callq truncf@PLT
6428; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
6429; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
6430; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
6431; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
6432; CHECK-NEXT:    addq $56, %rsp
6433; CHECK-NEXT:    .cfi_def_cfa_offset 8
6434; CHECK-NEXT:    retq
6435;
6436; AVX-LABEL: constrained_vector_trunc_v3f32_var:
6437; AVX:       # %bb.0: # %entry
6438; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
6439; AVX-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
6440; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
6441; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
6442; AVX-NEXT:    vroundss $11, %xmm1, %xmm1, %xmm1
6443; AVX-NEXT:    vroundss $11, %xmm2, %xmm2, %xmm2
6444; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
6445; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
6446; AVX-NEXT:    retq
6447entry:
6448  %b = load <3 x float>, ptr %a
6449  %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
6450                              <3 x float> %b,
6451                              metadata !"fpexcept.strict") #0
6452  ret <3 x float> %trunc
6453}
6454
6455define <3 x double> @constrained_vector_trunc_v3f64_var(ptr %a) #0 {
6456; CHECK-LABEL: constrained_vector_trunc_v3f64_var:
6457; CHECK:       # %bb.0: # %entry
6458; CHECK-NEXT:    subq $40, %rsp
6459; CHECK-NEXT:    .cfi_def_cfa_offset 48
6460; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
6461; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
6462; CHECK-NEXT:    movaps (%rdi), %xmm0
6463; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
6464; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
6465; CHECK-NEXT:    callq trunc@PLT
6466; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
6467; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
6468; CHECK-NEXT:    callq trunc@PLT
6469; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
6470; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
6471; CHECK-NEXT:    # xmm0 = mem[0],zero
6472; CHECK-NEXT:    callq trunc@PLT
6473; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
6474; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
6475; CHECK-NEXT:    wait
6476; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
6477; CHECK-NEXT:    # xmm0 = mem[0],zero
6478; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
6479; CHECK-NEXT:    # xmm1 = mem[0],zero
6480; CHECK-NEXT:    addq $40, %rsp
6481; CHECK-NEXT:    .cfi_def_cfa_offset 8
6482; CHECK-NEXT:    retq
6483;
6484; AVX-LABEL: constrained_vector_trunc_v3f64_var:
6485; AVX:       # %bb.0: # %entry
6486; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
6487; AVX-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
6488; AVX-NEXT:    vroundpd $11, (%rdi), %xmm1
6489; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
6490; AVX-NEXT:    retq
6491entry:
6492  %b = load <3 x double>, ptr %a
6493  %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
6494                          <3 x double> %b,
6495                          metadata !"fpexcept.strict") #0
6496  ret <3 x double> %trunc
6497}
6498
6499define <1 x double> @constrained_vector_sitofp_v1f64_v1i32(<1 x i32> %x) #0 {
6500; CHECK-LABEL: constrained_vector_sitofp_v1f64_v1i32:
6501; CHECK:       # %bb.0: # %entry
6502; CHECK-NEXT:    cvtsi2sd %edi, %xmm0
6503; CHECK-NEXT:    retq
6504;
6505; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i32:
6506; AVX:       # %bb.0: # %entry
6507; AVX-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0
6508; AVX-NEXT:    retq
6509entry:
6510  %result = call <1 x double>
6511           @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32> %x,
6512                                               metadata !"round.dynamic",
6513                                               metadata !"fpexcept.strict") #0
6514  ret <1 x double> %result
6515}
6516
6517define <1 x float> @constrained_vector_sitofp_v1f32_v1i32(<1 x i32> %x) #0 {
6518; CHECK-LABEL: constrained_vector_sitofp_v1f32_v1i32:
6519; CHECK:       # %bb.0: # %entry
6520; CHECK-NEXT:    cvtsi2ss %edi, %xmm0
6521; CHECK-NEXT:    retq
6522;
6523; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i32:
6524; AVX:       # %bb.0: # %entry
6525; AVX-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
6526; AVX-NEXT:    retq
6527entry:
6528  %result = call <1 x float>
6529           @llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32> %x,
6530                                               metadata !"round.dynamic",
6531                                               metadata !"fpexcept.strict") #0
6532  ret <1 x float> %result
6533}
6534
6535define <1 x double> @constrained_vector_sitofp_v1f64_v1i64(<1 x i64> %x) #0 {
6536; CHECK-LABEL: constrained_vector_sitofp_v1f64_v1i64:
6537; CHECK:       # %bb.0: # %entry
6538; CHECK-NEXT:    cvtsi2sd %rdi, %xmm0
6539; CHECK-NEXT:    retq
6540;
6541; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i64:
6542; AVX:       # %bb.0: # %entry
6543; AVX-NEXT:    vcvtsi2sd %rdi, %xmm0, %xmm0
6544; AVX-NEXT:    retq
6545entry:
6546  %result = call <1 x double>
6547           @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64> %x,
6548                                               metadata !"round.dynamic",
6549                                               metadata !"fpexcept.strict") #0
6550  ret <1 x double> %result
6551}
6552
6553define <1 x float> @constrained_vector_sitofp_v1f32_v1i64(<1 x i64> %x) #0 {
6554; CHECK-LABEL: constrained_vector_sitofp_v1f32_v1i64:
6555; CHECK:       # %bb.0: # %entry
6556; CHECK-NEXT:    cvtsi2ss %rdi, %xmm0
6557; CHECK-NEXT:    retq
6558;
6559; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i64:
6560; AVX:       # %bb.0: # %entry
6561; AVX-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
6562; AVX-NEXT:    retq
6563entry:
6564  %result = call <1 x float>
6565           @llvm.experimental.constrained.sitofp.v1f32.v1i64(<1 x i64> %x,
6566                                               metadata !"round.dynamic",
6567                                               metadata !"fpexcept.strict") #0
6568  ret <1 x float> %result
6569}
6570
6571define <2 x double> @constrained_vector_sitofp_v2f64_v2i32(<2 x i32> %x) #0 {
6572; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i32:
6573; CHECK:       # %bb.0: # %entry
6574; CHECK-NEXT:    cvtdq2pd %xmm0, %xmm0
6575; CHECK-NEXT:    retq
6576;
6577; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i32:
6578; AVX:       # %bb.0: # %entry
6579; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
6580; AVX-NEXT:    retq
6581entry:
6582  %result = call <2 x double>
6583           @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x,
6584                                               metadata !"round.dynamic",
6585                                               metadata !"fpexcept.strict") #0
6586  ret <2 x double> %result
6587}
6588
6589define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 {
6590; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i32:
6591; CHECK:       # %bb.0: # %entry
6592; CHECK-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
6593; CHECK-NEXT:    cvtdq2ps %xmm0, %xmm0
6594; CHECK-NEXT:    retq
6595;
6596; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i32:
6597; AVX:       # %bb.0: # %entry
6598; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
6599; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
6600; AVX-NEXT:    retq
6601entry:
6602  %result = call <2 x float>
6603           @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32> %x,
6604                                               metadata !"round.dynamic",
6605                                               metadata !"fpexcept.strict") #0
6606  ret <2 x float> %result
6607}
6608
6609define <2 x double> @constrained_vector_sitofp_v2f64_v2i64(<2 x i64> %x) #0 {
6610; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i64:
6611; CHECK:       # %bb.0: # %entry
6612; CHECK-NEXT:    movq %xmm0, %rax
6613; CHECK-NEXT:    cvtsi2sd %rax, %xmm1
6614; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6615; CHECK-NEXT:    movq %xmm0, %rax
6616; CHECK-NEXT:    xorps %xmm0, %xmm0
6617; CHECK-NEXT:    cvtsi2sd %rax, %xmm0
6618; CHECK-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
6619; CHECK-NEXT:    movapd %xmm1, %xmm0
6620; CHECK-NEXT:    retq
6621;
6622; AVX1-LABEL: constrained_vector_sitofp_v2f64_v2i64:
6623; AVX1:       # %bb.0: # %entry
6624; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
6625; AVX1-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
6626; AVX1-NEXT:    vmovq %xmm0, %rax
6627; AVX1-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm0
6628; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
6629; AVX1-NEXT:    retq
6630;
6631; AVX512F-LABEL: constrained_vector_sitofp_v2f64_v2i64:
6632; AVX512F:       # %bb.0: # %entry
6633; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
6634; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
6635; AVX512F-NEXT:    vmovq %xmm0, %rax
6636; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm0
6637; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
6638; AVX512F-NEXT:    retq
6639;
6640; AVX512DQ-LABEL: constrained_vector_sitofp_v2f64_v2i64:
6641; AVX512DQ:       # %bb.0: # %entry
6642; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
6643; AVX512DQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
6644; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
6645; AVX512DQ-NEXT:    vzeroupper
6646; AVX512DQ-NEXT:    retq
6647entry:
6648  %result = call <2 x double>
6649           @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x,
6650                                               metadata !"round.dynamic",
6651                                               metadata !"fpexcept.strict") #0
6652  ret <2 x double> %result
6653}
6654
6655define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 {
6656; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i64:
6657; CHECK:       # %bb.0: # %entry
6658; CHECK-NEXT:    movq %xmm0, %rax
6659; CHECK-NEXT:    cvtsi2ss %rax, %xmm1
6660; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6661; CHECK-NEXT:    movq %xmm0, %rax
6662; CHECK-NEXT:    xorps %xmm0, %xmm0
6663; CHECK-NEXT:    cvtsi2ss %rax, %xmm0
6664; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6665; CHECK-NEXT:    movaps %xmm1, %xmm0
6666; CHECK-NEXT:    retq
6667;
6668; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i64:
6669; AVX:       # %bb.0: # %entry
6670; AVX-NEXT:    vpextrq $1, %xmm0, %rax
6671; AVX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
6672; AVX-NEXT:    vmovq %xmm0, %rax
6673; AVX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
6674; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
6675; AVX-NEXT:    retq
6676entry:
6677  %result = call <2 x float>
6678           @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64> %x,
6679                                               metadata !"round.dynamic",
6680                                               metadata !"fpexcept.strict") #0
6681  ret <2 x float> %result
6682}
6683
6684define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 {
6685; CHECK-LABEL: constrained_vector_sitofp_v3f64_v3i32:
6686; CHECK:       # %bb.0: # %entry
6687; CHECK-NEXT:    movd %xmm0, %eax
6688; CHECK-NEXT:    cvtsi2sd %eax, %xmm2
6689; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
6690; CHECK-NEXT:    movd %xmm1, %eax
6691; CHECK-NEXT:    xorps %xmm1, %xmm1
6692; CHECK-NEXT:    cvtsi2sd %eax, %xmm1
6693; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6694; CHECK-NEXT:    movd %xmm0, %eax
6695; CHECK-NEXT:    xorps %xmm0, %xmm0
6696; CHECK-NEXT:    cvtsi2sd %eax, %xmm0
6697; CHECK-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
6698; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
6699; CHECK-NEXT:    wait
6700; CHECK-NEXT:    movapd %xmm2, %xmm0
6701; CHECK-NEXT:    retq
6702;
6703; AVX-LABEL: constrained_vector_sitofp_v3f64_v3i32:
6704; AVX:       # %bb.0: # %entry
6705; AVX-NEXT:    vextractps $1, %xmm0, %eax
6706; AVX-NEXT:    vcvtsi2sd %eax, %xmm1, %xmm1
6707; AVX-NEXT:    vmovd %xmm0, %eax
6708; AVX-NEXT:    vcvtsi2sd %eax, %xmm2, %xmm2
6709; AVX-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
6710; AVX-NEXT:    vpextrd $2, %xmm0, %eax
6711; AVX-NEXT:    vcvtsi2sd %eax, %xmm3, %xmm0
6712; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
6713; AVX-NEXT:    retq
6714entry:
6715  %result = call <3 x double>
6716           @llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32> %x,
6717                                               metadata !"round.dynamic",
6718                                               metadata !"fpexcept.strict") #0
6719  ret <3 x double> %result
6720}
6721
6722define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
6723; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i32:
6724; CHECK:       # %bb.0: # %entry
6725; CHECK-NEXT:    movd %xmm0, %eax
6726; CHECK-NEXT:    cvtsi2ss %eax, %xmm1
6727; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
6728; CHECK-NEXT:    movd %xmm2, %eax
6729; CHECK-NEXT:    xorps %xmm2, %xmm2
6730; CHECK-NEXT:    cvtsi2ss %eax, %xmm2
6731; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
6732; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6733; CHECK-NEXT:    movd %xmm0, %eax
6734; CHECK-NEXT:    xorps %xmm0, %xmm0
6735; CHECK-NEXT:    cvtsi2ss %eax, %xmm0
6736; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
6737; CHECK-NEXT:    movaps %xmm1, %xmm0
6738; CHECK-NEXT:    retq
6739;
6740; AVX-LABEL: constrained_vector_sitofp_v3f32_v3i32:
6741; AVX:       # %bb.0: # %entry
6742; AVX-NEXT:    vextractps $1, %xmm0, %eax
6743; AVX-NEXT:    vcvtsi2ss %eax, %xmm1, %xmm1
6744; AVX-NEXT:    vmovd %xmm0, %eax
6745; AVX-NEXT:    vcvtsi2ss %eax, %xmm2, %xmm2
6746; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
6747; AVX-NEXT:    vpextrd $2, %xmm0, %eax
6748; AVX-NEXT:    vcvtsi2ss %eax, %xmm3, %xmm0
6749; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
6750; AVX-NEXT:    retq
6751entry:
6752  %result = call <3 x float>
6753           @llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32> %x,
6754                                               metadata !"round.dynamic",
6755                                               metadata !"fpexcept.strict") #0
6756  ret <3 x float> %result
6757}
6758
6759define <3 x double> @constrained_vector_sitofp_v3f64_v3i64(<3 x i64> %x) #0 {
6760; CHECK-LABEL: constrained_vector_sitofp_v3f64_v3i64:
6761; CHECK:       # %bb.0: # %entry
6762; CHECK-NEXT:    cvtsi2sd %rsi, %xmm1
6763; CHECK-NEXT:    cvtsi2sd %rdi, %xmm0
6764; CHECK-NEXT:    cvtsi2sd %rdx, %xmm2
6765; CHECK-NEXT:    movsd %xmm2, -{{[0-9]+}}(%rsp)
6766; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
6767; CHECK-NEXT:    wait
6768; CHECK-NEXT:    retq
6769;
6770; AVX1-LABEL: constrained_vector_sitofp_v3f64_v3i64:
6771; AVX1:       # %bb.0: # %entry
6772; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
6773; AVX1-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
6774; AVX1-NEXT:    vmovq %xmm0, %rax
6775; AVX1-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
6776; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
6777; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
6778; AVX1-NEXT:    vmovq %xmm0, %rax
6779; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
6780; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
6781; AVX1-NEXT:    retq
6782;
6783; AVX512-LABEL: constrained_vector_sitofp_v3f64_v3i64:
6784; AVX512:       # %bb.0: # %entry
6785; AVX512-NEXT:    vpextrq $1, %xmm0, %rax
6786; AVX512-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
6787; AVX512-NEXT:    vmovq %xmm0, %rax
6788; AVX512-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
6789; AVX512-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
6790; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
6791; AVX512-NEXT:    vmovq %xmm0, %rax
6792; AVX512-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
6793; AVX512-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
6794; AVX512-NEXT:    retq
6795entry:
6796  %result = call <3 x double>
6797           @llvm.experimental.constrained.sitofp.v3f64.v3i64(<3 x i64> %x,
6798                                               metadata !"round.dynamic",
6799                                               metadata !"fpexcept.strict") #0
6800  ret <3 x double> %result
6801}
6802
6803define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
6804; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i64:
6805; CHECK:       # %bb.0: # %entry
6806; CHECK-NEXT:    cvtsi2ss %rsi, %xmm1
6807; CHECK-NEXT:    cvtsi2ss %rdi, %xmm0
6808; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6809; CHECK-NEXT:    xorps %xmm1, %xmm1
6810; CHECK-NEXT:    cvtsi2ss %rdx, %xmm1
6811; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
6812; CHECK-NEXT:    retq
6813;
6814; AVX1-LABEL: constrained_vector_sitofp_v3f32_v3i64:
6815; AVX1:       # %bb.0: # %entry
6816; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
6817; AVX1-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
6818; AVX1-NEXT:    vmovq %xmm0, %rax
6819; AVX1-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
6820; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
6821; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
6822; AVX1-NEXT:    vmovq %xmm0, %rax
6823; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
6824; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
6825; AVX1-NEXT:    vzeroupper
6826; AVX1-NEXT:    retq
6827;
6828; AVX512-LABEL: constrained_vector_sitofp_v3f32_v3i64:
6829; AVX512:       # %bb.0: # %entry
6830; AVX512-NEXT:    vpextrq $1, %xmm0, %rax
6831; AVX512-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
6832; AVX512-NEXT:    vmovq %xmm0, %rax
6833; AVX512-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
6834; AVX512-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
6835; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
6836; AVX512-NEXT:    vmovq %xmm0, %rax
6837; AVX512-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
6838; AVX512-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
6839; AVX512-NEXT:    vzeroupper
6840; AVX512-NEXT:    retq
6841entry:
6842  %result = call <3 x float>
6843           @llvm.experimental.constrained.sitofp.v3f32.v3i64(<3 x i64> %x,
6844                                               metadata !"round.dynamic",
6845                                               metadata !"fpexcept.strict") #0
6846  ret <3 x float> %result
6847}
6848
6849define <4 x double> @constrained_vector_sitofp_v4f64_v4i32(<4 x i32> %x) #0 {
6850; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i32:
6851; CHECK:       # %bb.0: # %entry
6852; CHECK-NEXT:    cvtdq2pd %xmm0, %xmm2
6853; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6854; CHECK-NEXT:    cvtdq2pd %xmm0, %xmm1
6855; CHECK-NEXT:    movaps %xmm2, %xmm0
6856; CHECK-NEXT:    retq
6857;
6858; AVX-LABEL: constrained_vector_sitofp_v4f64_v4i32:
6859; AVX:       # %bb.0: # %entry
6860; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
6861; AVX-NEXT:    retq
6862entry:
6863  %result = call <4 x double>
6864           @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32> %x,
6865                                               metadata !"round.dynamic",
6866                                               metadata !"fpexcept.strict") #0
6867  ret <4 x double> %result
6868}
6869
6870define <4 x float> @constrained_vector_sitofp_v4f32_v4i32(<4 x i32> %x) #0 {
6871; CHECK-LABEL: constrained_vector_sitofp_v4f32_v4i32:
6872; CHECK:       # %bb.0: # %entry
6873; CHECK-NEXT:    cvtdq2ps %xmm0, %xmm0
6874; CHECK-NEXT:    retq
6875;
6876; AVX-LABEL: constrained_vector_sitofp_v4f32_v4i32:
6877; AVX:       # %bb.0: # %entry
6878; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
6879; AVX-NEXT:    retq
6880entry:
6881  %result = call <4 x float>
6882           @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x,
6883                                               metadata !"round.dynamic",
6884                                               metadata !"fpexcept.strict") #0
6885  ret <4 x float> %result
6886}
6887
6888define <4 x double> @constrained_vector_sitofp_v4f64_v4i64(<4 x i64> %x) #0 {
6889; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i64:
6890; CHECK:       # %bb.0: # %entry
6891; CHECK-NEXT:    movq %xmm0, %rax
6892; CHECK-NEXT:    cvtsi2sd %rax, %xmm2
6893; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6894; CHECK-NEXT:    movq %xmm0, %rax
6895; CHECK-NEXT:    xorps %xmm0, %xmm0
6896; CHECK-NEXT:    cvtsi2sd %rax, %xmm0
6897; CHECK-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
6898; CHECK-NEXT:    movq %xmm1, %rax
6899; CHECK-NEXT:    cvtsi2sd %rax, %xmm3
6900; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
6901; CHECK-NEXT:    movq %xmm0, %rax
6902; CHECK-NEXT:    xorps %xmm0, %xmm0
6903; CHECK-NEXT:    cvtsi2sd %rax, %xmm0
6904; CHECK-NEXT:    unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
6905; CHECK-NEXT:    movapd %xmm2, %xmm0
6906; CHECK-NEXT:    movapd %xmm3, %xmm1
6907; CHECK-NEXT:    retq
6908;
6909; AVX1-LABEL: constrained_vector_sitofp_v4f64_v4i64:
6910; AVX1:       # %bb.0: # %entry
6911; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
6912; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
6913; AVX1-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
6914; AVX1-NEXT:    vmovq %xmm1, %rax
6915; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
6916; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
6917; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
6918; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm2
6919; AVX1-NEXT:    vmovq %xmm0, %rax
6920; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
6921; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
6922; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
6923; AVX1-NEXT:    retq
6924;
6925; AVX512F-LABEL: constrained_vector_sitofp_v4f64_v4i64:
6926; AVX512F:       # %bb.0: # %entry
6927; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
6928; AVX512F-NEXT:    vpextrq $1, %xmm1, %rax
6929; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
6930; AVX512F-NEXT:    vmovq %xmm1, %rax
6931; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm1
6932; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
6933; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
6934; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm2
6935; AVX512F-NEXT:    vmovq %xmm0, %rax
6936; AVX512F-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
6937; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
6938; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
6939; AVX512F-NEXT:    retq
6940;
6941; AVX512DQ-LABEL: constrained_vector_sitofp_v4f64_v4i64:
6942; AVX512DQ:       # %bb.0: # %entry
6943; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
6944; AVX512DQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
6945; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
6946; AVX512DQ-NEXT:    retq
6947entry:
6948  %result = call <4 x double>
6949           @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x,
6950                                               metadata !"round.dynamic",
6951                                               metadata !"fpexcept.strict") #0
6952  ret <4 x double> %result
6953}
6954
6955define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
6956; CHECK-LABEL: constrained_vector_sitofp_v4f32_v4i64:
6957; CHECK:       # %bb.0: # %entry
6958; CHECK-NEXT:    movq %xmm1, %rax
6959; CHECK-NEXT:    cvtsi2ss %rax, %xmm2
6960; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
6961; CHECK-NEXT:    movq %xmm1, %rax
6962; CHECK-NEXT:    xorps %xmm1, %xmm1
6963; CHECK-NEXT:    cvtsi2ss %rax, %xmm1
6964; CHECK-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
6965; CHECK-NEXT:    movq %xmm0, %rax
6966; CHECK-NEXT:    xorps %xmm1, %xmm1
6967; CHECK-NEXT:    cvtsi2ss %rax, %xmm1
6968; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
6969; CHECK-NEXT:    movq %xmm0, %rax
6970; CHECK-NEXT:    xorps %xmm0, %xmm0
6971; CHECK-NEXT:    cvtsi2ss %rax, %xmm0
6972; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6973; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
6974; CHECK-NEXT:    movaps %xmm1, %xmm0
6975; CHECK-NEXT:    retq
6976;
6977; AVX1-LABEL: constrained_vector_sitofp_v4f32_v4i64:
6978; AVX1:       # %bb.0: # %entry
6979; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
6980; AVX1-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
6981; AVX1-NEXT:    vmovq %xmm0, %rax
6982; AVX1-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
6983; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
6984; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
6985; AVX1-NEXT:    vmovq %xmm0, %rax
6986; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
6987; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
6988; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
6989; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
6990; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
6991; AVX1-NEXT:    vzeroupper
6992; AVX1-NEXT:    retq
6993;
6994; AVX512F-LABEL: constrained_vector_sitofp_v4f32_v4i64:
6995; AVX512F:       # %bb.0: # %entry
6996; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
6997; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
6998; AVX512F-NEXT:    vmovq %xmm0, %rax
6999; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
7000; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
7001; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
7002; AVX512F-NEXT:    vmovq %xmm0, %rax
7003; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
7004; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
7005; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
7006; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
7007; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
7008; AVX512F-NEXT:    vzeroupper
7009; AVX512F-NEXT:    retq
7010;
7011; AVX512DQ-LABEL: constrained_vector_sitofp_v4f32_v4i64:
7012; AVX512DQ:       # %bb.0: # %entry
7013; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
7014; AVX512DQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
7015; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
7016; AVX512DQ-NEXT:    vzeroupper
7017; AVX512DQ-NEXT:    retq
7018entry:
7019  %result = call <4 x float>
7020           @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x,
7021                                               metadata !"round.dynamic",
7022                                               metadata !"fpexcept.strict") #0
7023  ret <4 x float> %result
7024}
7025
7026define <1 x double> @constrained_vector_uitofp_v1f64_v1i32(<1 x i32> %x) #0 {
7027; CHECK-LABEL: constrained_vector_uitofp_v1f64_v1i32:
7028; CHECK:       # %bb.0: # %entry
7029; CHECK-NEXT:    movl %edi, %eax
7030; CHECK-NEXT:    cvtsi2sd %rax, %xmm0
7031; CHECK-NEXT:    retq
7032;
7033; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i32:
7034; AVX1:       # %bb.0: # %entry
7035; AVX1-NEXT:    movl %edi, %eax
7036; AVX1-NEXT:    vcvtsi2sd %rax, %xmm0, %xmm0
7037; AVX1-NEXT:    retq
7038;
7039; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i32:
7040; AVX512:       # %bb.0: # %entry
7041; AVX512-NEXT:    vcvtusi2sd %edi, %xmm0, %xmm0
7042; AVX512-NEXT:    retq
7043entry:
7044  %result = call <1 x double>
7045           @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32> %x,
7046                                               metadata !"round.dynamic",
7047                                               metadata !"fpexcept.strict") #0
7048  ret <1 x double> %result
7049}
7050
7051define <1 x float> @constrained_vector_uitofp_v1f32_v1i32(<1 x i32> %x) #0 {
7052; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i32:
7053; CHECK:       # %bb.0: # %entry
7054; CHECK-NEXT:    movl %edi, %eax
7055; CHECK-NEXT:    cvtsi2ss %rax, %xmm0
7056; CHECK-NEXT:    retq
7057;
7058; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i32:
7059; AVX1:       # %bb.0: # %entry
7060; AVX1-NEXT:    movl %edi, %eax
7061; AVX1-NEXT:    vcvtsi2ss %rax, %xmm0, %xmm0
7062; AVX1-NEXT:    retq
7063;
7064; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i32:
7065; AVX512:       # %bb.0: # %entry
7066; AVX512-NEXT:    vcvtusi2ss %edi, %xmm0, %xmm0
7067; AVX512-NEXT:    retq
7068entry:
7069  %result = call <1 x float>
7070           @llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32> %x,
7071                                               metadata !"round.dynamic",
7072                                               metadata !"fpexcept.strict") #0
7073  ret <1 x float> %result
7074}
7075
7076define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
7077; CHECK-LABEL: constrained_vector_uitofp_v1f64_v1i64:
7078; CHECK:       # %bb.0: # %entry
7079; CHECK-NEXT:    movq %rdi, %rax
7080; CHECK-NEXT:    shrq %rax
7081; CHECK-NEXT:    movl %edi, %ecx
7082; CHECK-NEXT:    andl $1, %ecx
7083; CHECK-NEXT:    orq %rax, %rcx
7084; CHECK-NEXT:    testq %rdi, %rdi
7085; CHECK-NEXT:    cmovnsq %rdi, %rcx
7086; CHECK-NEXT:    cvtsi2sd %rcx, %xmm0
7087; CHECK-NEXT:    jns .LBB175_2
7088; CHECK-NEXT:  # %bb.1:
7089; CHECK-NEXT:    addsd %xmm0, %xmm0
7090; CHECK-NEXT:  .LBB175_2: # %entry
7091; CHECK-NEXT:    retq
7092;
7093; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i64:
7094; AVX1:       # %bb.0: # %entry
7095; AVX1-NEXT:    movq %rdi, %rax
7096; AVX1-NEXT:    shrq %rax
7097; AVX1-NEXT:    movl %edi, %ecx
7098; AVX1-NEXT:    andl $1, %ecx
7099; AVX1-NEXT:    orq %rax, %rcx
7100; AVX1-NEXT:    testq %rdi, %rdi
7101; AVX1-NEXT:    cmovnsq %rdi, %rcx
7102; AVX1-NEXT:    vcvtsi2sd %rcx, %xmm0, %xmm0
7103; AVX1-NEXT:    jns .LBB175_2
7104; AVX1-NEXT:  # %bb.1:
7105; AVX1-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
7106; AVX1-NEXT:  .LBB175_2: # %entry
7107; AVX1-NEXT:    retq
7108;
7109; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i64:
7110; AVX512:       # %bb.0: # %entry
7111; AVX512-NEXT:    vcvtusi2sd %rdi, %xmm0, %xmm0
7112; AVX512-NEXT:    retq
7113entry:
7114  %result = call <1 x double>
7115           @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64> %x,
7116                                               metadata !"round.dynamic",
7117                                               metadata !"fpexcept.strict") #0
7118  ret <1 x double> %result
7119}
7120
7121define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 {
7122; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i64:
7123; CHECK:       # %bb.0: # %entry
7124; CHECK-NEXT:    movq %rdi, %rax
7125; CHECK-NEXT:    shrq %rax
7126; CHECK-NEXT:    movl %edi, %ecx
7127; CHECK-NEXT:    andl $1, %ecx
7128; CHECK-NEXT:    orq %rax, %rcx
7129; CHECK-NEXT:    testq %rdi, %rdi
7130; CHECK-NEXT:    cmovnsq %rdi, %rcx
7131; CHECK-NEXT:    cvtsi2ss %rcx, %xmm0
7132; CHECK-NEXT:    jns .LBB176_2
7133; CHECK-NEXT:  # %bb.1:
7134; CHECK-NEXT:    addss %xmm0, %xmm0
7135; CHECK-NEXT:  .LBB176_2: # %entry
7136; CHECK-NEXT:    retq
7137;
7138; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i64:
7139; AVX1:       # %bb.0: # %entry
7140; AVX1-NEXT:    movq %rdi, %rax
7141; AVX1-NEXT:    shrq %rax
7142; AVX1-NEXT:    movl %edi, %ecx
7143; AVX1-NEXT:    andl $1, %ecx
7144; AVX1-NEXT:    orq %rax, %rcx
7145; AVX1-NEXT:    testq %rdi, %rdi
7146; AVX1-NEXT:    cmovnsq %rdi, %rcx
7147; AVX1-NEXT:    vcvtsi2ss %rcx, %xmm0, %xmm0
7148; AVX1-NEXT:    jns .LBB176_2
7149; AVX1-NEXT:  # %bb.1:
7150; AVX1-NEXT:    vaddss %xmm0, %xmm0, %xmm0
7151; AVX1-NEXT:  .LBB176_2: # %entry
7152; AVX1-NEXT:    retq
7153;
7154; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i64:
7155; AVX512:       # %bb.0: # %entry
7156; AVX512-NEXT:    vcvtusi2ss %rdi, %xmm0, %xmm0
7157; AVX512-NEXT:    retq
7158entry:
7159  %result = call <1 x float>
7160           @llvm.experimental.constrained.uitofp.v1f32.v1i64(<1 x i64> %x,
7161                                               metadata !"round.dynamic",
7162                                               metadata !"fpexcept.strict") #0
7163  ret <1 x float> %result
7164}
7165
7166define <2 x double> @constrained_vector_uitofp_v2f64_v2i32(<2 x i32> %x) #0 {
7167; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i32:
7168; CHECK:       # %bb.0: # %entry
7169; CHECK-NEXT:    xorpd %xmm1, %xmm1
7170; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7171; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
7172; CHECK-NEXT:    orpd %xmm1, %xmm0
7173; CHECK-NEXT:    subpd %xmm1, %xmm0
7174; CHECK-NEXT:    retq
7175;
7176; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i32:
7177; AVX1:       # %bb.0: # %entry
7178; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7179; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
7180; AVX1-NEXT:    # xmm1 = mem[0,0]
7181; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
7182; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
7183; AVX1-NEXT:    retq
7184;
7185; AVX512-LABEL: constrained_vector_uitofp_v2f64_v2i32:
7186; AVX512:       # %bb.0: # %entry
7187; AVX512-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
7188; AVX512-NEXT:    vcvtudq2pd %ymm0, %zmm0
7189; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
7190; AVX512-NEXT:    vzeroupper
7191; AVX512-NEXT:    retq
7192entry:
7193  %result = call <2 x double>
7194           @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x,
7195                                               metadata !"round.dynamic",
7196                                               metadata !"fpexcept.strict") #0
7197  ret <2 x double> %result
7198}
7199
7200define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 {
7201; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i32:
7202; CHECK:       # %bb.0: # %entry
7203; CHECK-NEXT:    xorpd %xmm1, %xmm1
7204; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7205; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
7206; CHECK-NEXT:    orpd %xmm1, %xmm0
7207; CHECK-NEXT:    subpd %xmm1, %xmm0
7208; CHECK-NEXT:    cvtpd2ps %xmm0, %xmm0
7209; CHECK-NEXT:    retq
7210;
7211; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i32:
7212; AVX1:       # %bb.0: # %entry
7213; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7214; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15]
7215; AVX1-NEXT:    # xmm1 = mem[0,0]
7216; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
7217; AVX1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
7218; AVX1-NEXT:    vcvtpd2ps %xmm0, %xmm0
7219; AVX1-NEXT:    retq
7220;
7221; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i32:
7222; AVX512:       # %bb.0: # %entry
7223; AVX512-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
7224; AVX512-NEXT:    vcvtudq2ps %zmm0, %zmm0
7225; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
7226; AVX512-NEXT:    vzeroupper
7227; AVX512-NEXT:    retq
7228entry:
7229  %result = call <2 x float>
7230           @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x,
7231                                               metadata !"round.dynamic",
7232                                               metadata !"fpexcept.strict") #0
7233  ret <2 x float> %result
7234}
7235
7236define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
7237; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i64:
7238; CHECK:       # %bb.0: # %entry
7239; CHECK-NEXT:    movdqa %xmm0, %xmm1
7240; CHECK-NEXT:    movq %xmm0, %rax
7241; CHECK-NEXT:    movq %rax, %rcx
7242; CHECK-NEXT:    shrq %rcx
7243; CHECK-NEXT:    movl %eax, %edx
7244; CHECK-NEXT:    andl $1, %edx
7245; CHECK-NEXT:    orq %rcx, %rdx
7246; CHECK-NEXT:    testq %rax, %rax
7247; CHECK-NEXT:    cmovnsq %rax, %rdx
7248; CHECK-NEXT:    xorps %xmm0, %xmm0
7249; CHECK-NEXT:    cvtsi2sd %rdx, %xmm0
7250; CHECK-NEXT:    jns .LBB179_2
7251; CHECK-NEXT:  # %bb.1:
7252; CHECK-NEXT:    addsd %xmm0, %xmm0
7253; CHECK-NEXT:  .LBB179_2: # %entry
7254; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
7255; CHECK-NEXT:    movq %xmm1, %rax
7256; CHECK-NEXT:    movq %rax, %rcx
7257; CHECK-NEXT:    shrq %rcx
7258; CHECK-NEXT:    movl %eax, %edx
7259; CHECK-NEXT:    andl $1, %edx
7260; CHECK-NEXT:    orq %rcx, %rdx
7261; CHECK-NEXT:    testq %rax, %rax
7262; CHECK-NEXT:    cmovnsq %rax, %rdx
7263; CHECK-NEXT:    xorps %xmm1, %xmm1
7264; CHECK-NEXT:    cvtsi2sd %rdx, %xmm1
7265; CHECK-NEXT:    jns .LBB179_4
7266; CHECK-NEXT:  # %bb.3:
7267; CHECK-NEXT:    addsd %xmm1, %xmm1
7268; CHECK-NEXT:  .LBB179_4: # %entry
7269; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
7270; CHECK-NEXT:    retq
7271;
7272; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i64:
7273; AVX1:       # %bb.0: # %entry
7274; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
7275; AVX1-NEXT:    movq %rax, %rcx
7276; AVX1-NEXT:    shrq %rcx
7277; AVX1-NEXT:    movl %eax, %edx
7278; AVX1-NEXT:    andl $1, %edx
7279; AVX1-NEXT:    orq %rcx, %rdx
7280; AVX1-NEXT:    testq %rax, %rax
7281; AVX1-NEXT:    cmovnsq %rax, %rdx
7282; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm1, %xmm1
7283; AVX1-NEXT:    jns .LBB179_2
7284; AVX1-NEXT:  # %bb.1:
7285; AVX1-NEXT:    vaddsd %xmm1, %xmm1, %xmm1
7286; AVX1-NEXT:  .LBB179_2: # %entry
7287; AVX1-NEXT:    vmovq %xmm0, %rax
7288; AVX1-NEXT:    movq %rax, %rcx
7289; AVX1-NEXT:    shrq %rcx
7290; AVX1-NEXT:    movl %eax, %edx
7291; AVX1-NEXT:    andl $1, %edx
7292; AVX1-NEXT:    orq %rcx, %rdx
7293; AVX1-NEXT:    testq %rax, %rax
7294; AVX1-NEXT:    cmovnsq %rax, %rdx
7295; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm2, %xmm0
7296; AVX1-NEXT:    jns .LBB179_4
7297; AVX1-NEXT:  # %bb.3:
7298; AVX1-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
7299; AVX1-NEXT:  .LBB179_4: # %entry
7300; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
7301; AVX1-NEXT:    retq
7302;
7303; AVX512F-LABEL: constrained_vector_uitofp_v2f64_v2i64:
7304; AVX512F:       # %bb.0: # %entry
7305; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
7306; AVX512F-NEXT:    vcvtusi2sd %rax, %xmm1, %xmm1
7307; AVX512F-NEXT:    vmovq %xmm0, %rax
7308; AVX512F-NEXT:    vcvtusi2sd %rax, %xmm2, %xmm0
7309; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
7310; AVX512F-NEXT:    retq
7311;
7312; AVX512DQ-LABEL: constrained_vector_uitofp_v2f64_v2i64:
7313; AVX512DQ:       # %bb.0: # %entry
7314; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
7315; AVX512DQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
7316; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
7317; AVX512DQ-NEXT:    vzeroupper
7318; AVX512DQ-NEXT:    retq
7319entry:
7320  %result = call <2 x double>
7321           @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x,
7322                                               metadata !"round.dynamic",
7323                                               metadata !"fpexcept.strict") #0
7324  ret <2 x double> %result
7325}
7326
7327define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
7328; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i64:
7329; CHECK:       # %bb.0: # %entry
7330; CHECK-NEXT:    movdqa %xmm0, %xmm1
7331; CHECK-NEXT:    movq %xmm0, %rax
7332; CHECK-NEXT:    movq %rax, %rcx
7333; CHECK-NEXT:    shrq %rcx
7334; CHECK-NEXT:    movl %eax, %edx
7335; CHECK-NEXT:    andl $1, %edx
7336; CHECK-NEXT:    orq %rcx, %rdx
7337; CHECK-NEXT:    testq %rax, %rax
7338; CHECK-NEXT:    cmovnsq %rax, %rdx
7339; CHECK-NEXT:    xorps %xmm0, %xmm0
7340; CHECK-NEXT:    cvtsi2ss %rdx, %xmm0
7341; CHECK-NEXT:    jns .LBB180_2
7342; CHECK-NEXT:  # %bb.1:
7343; CHECK-NEXT:    addss %xmm0, %xmm0
7344; CHECK-NEXT:  .LBB180_2: # %entry
7345; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
7346; CHECK-NEXT:    movq %xmm1, %rax
7347; CHECK-NEXT:    movq %rax, %rcx
7348; CHECK-NEXT:    shrq %rcx
7349; CHECK-NEXT:    movl %eax, %edx
7350; CHECK-NEXT:    andl $1, %edx
7351; CHECK-NEXT:    orq %rcx, %rdx
7352; CHECK-NEXT:    testq %rax, %rax
7353; CHECK-NEXT:    cmovnsq %rax, %rdx
7354; CHECK-NEXT:    xorps %xmm1, %xmm1
7355; CHECK-NEXT:    cvtsi2ss %rdx, %xmm1
7356; CHECK-NEXT:    jns .LBB180_4
7357; CHECK-NEXT:  # %bb.3:
7358; CHECK-NEXT:    addss %xmm1, %xmm1
7359; CHECK-NEXT:  .LBB180_4: # %entry
7360; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7361; CHECK-NEXT:    retq
7362;
7363; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i64:
7364; AVX1:       # %bb.0: # %entry
7365; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
7366; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm2
7367; AVX1-NEXT:    vpor %xmm1, %xmm2, %xmm1
7368; AVX1-NEXT:    vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
7369; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
7370; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
7371; AVX1-NEXT:    vmovq %xmm1, %rax
7372; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm1
7373; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
7374; AVX1-NEXT:    vaddps %xmm1, %xmm1, %xmm2
7375; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
7376; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
7377; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
7378; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
7379; AVX1-NEXT:    retq
7380;
7381; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i64:
7382; AVX512:       # %bb.0: # %entry
7383; AVX512-NEXT:    vpextrq $1, %xmm0, %rax
7384; AVX512-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
7385; AVX512-NEXT:    vmovq %xmm0, %rax
7386; AVX512-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
7387; AVX512-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
7388; AVX512-NEXT:    retq
7389entry:
7390  %result = call <2 x float>
7391           @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64> %x,
7392                                               metadata !"round.dynamic",
7393                                               metadata !"fpexcept.strict") #0
7394  ret <2 x float> %result
7395}
7396
7397define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 {
7398; CHECK-LABEL: constrained_vector_uitofp_v3f64_v3i32:
7399; CHECK:       # %bb.0: # %entry
7400; CHECK-NEXT:    movd %xmm0, %eax
7401; CHECK-NEXT:    cvtsi2sd %rax, %xmm2
7402; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
7403; CHECK-NEXT:    movd %xmm1, %eax
7404; CHECK-NEXT:    xorps %xmm1, %xmm1
7405; CHECK-NEXT:    cvtsi2sd %rax, %xmm1
7406; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
7407; CHECK-NEXT:    movd %xmm0, %eax
7408; CHECK-NEXT:    xorps %xmm0, %xmm0
7409; CHECK-NEXT:    cvtsi2sd %rax, %xmm0
7410; CHECK-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
7411; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
7412; CHECK-NEXT:    wait
7413; CHECK-NEXT:    movapd %xmm2, %xmm0
7414; CHECK-NEXT:    retq
7415;
7416; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i32:
7417; AVX1:       # %bb.0: # %entry
7418; AVX1-NEXT:    vextractps $1, %xmm0, %eax
7419; AVX1-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm1
7420; AVX1-NEXT:    vmovd %xmm0, %eax
7421; AVX1-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
7422; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
7423; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
7424; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm0
7425; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
7426; AVX1-NEXT:    retq
7427;
7428; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i32:
7429; AVX512:       # %bb.0: # %entry
7430; AVX512-NEXT:    vextractps $1, %xmm0, %eax
7431; AVX512-NEXT:    vcvtusi2sd %eax, %xmm1, %xmm1
7432; AVX512-NEXT:    vmovd %xmm0, %eax
7433; AVX512-NEXT:    vcvtusi2sd %eax, %xmm2, %xmm2
7434; AVX512-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
7435; AVX512-NEXT:    vpextrd $2, %xmm0, %eax
7436; AVX512-NEXT:    vcvtusi2sd %eax, %xmm3, %xmm0
7437; AVX512-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
7438; AVX512-NEXT:    retq
7439entry:
7440  %result = call <3 x double>
7441           @llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32> %x,
7442                                               metadata !"round.dynamic",
7443                                               metadata !"fpexcept.strict") #0
7444  ret <3 x double> %result
7445}
7446
7447define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
7448; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i32:
7449; CHECK:       # %bb.0: # %entry
7450; CHECK-NEXT:    movd %xmm0, %eax
7451; CHECK-NEXT:    cvtsi2ss %rax, %xmm1
7452; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
7453; CHECK-NEXT:    movd %xmm2, %eax
7454; CHECK-NEXT:    xorps %xmm2, %xmm2
7455; CHECK-NEXT:    cvtsi2ss %rax, %xmm2
7456; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
7457; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
7458; CHECK-NEXT:    movd %xmm0, %eax
7459; CHECK-NEXT:    xorps %xmm0, %xmm0
7460; CHECK-NEXT:    cvtsi2ss %rax, %xmm0
7461; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
7462; CHECK-NEXT:    movaps %xmm1, %xmm0
7463; CHECK-NEXT:    retq
7464;
7465; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i32:
7466; AVX1:       # %bb.0: # %entry
7467; AVX1-NEXT:    vextractps $1, %xmm0, %eax
7468; AVX1-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
7469; AVX1-NEXT:    vmovd %xmm0, %eax
7470; AVX1-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
7471; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
7472; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
7473; AVX1-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
7474; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
7475; AVX1-NEXT:    retq
7476;
7477; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i32:
7478; AVX512:       # %bb.0: # %entry
7479; AVX512-NEXT:    vextractps $1, %xmm0, %eax
7480; AVX512-NEXT:    vcvtusi2ss %eax, %xmm1, %xmm1
7481; AVX512-NEXT:    vmovd %xmm0, %eax
7482; AVX512-NEXT:    vcvtusi2ss %eax, %xmm2, %xmm2
7483; AVX512-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
7484; AVX512-NEXT:    vpextrd $2, %xmm0, %eax
7485; AVX512-NEXT:    vcvtusi2ss %eax, %xmm3, %xmm0
7486; AVX512-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
7487; AVX512-NEXT:    retq
7488entry:
7489  %result = call <3 x float>
7490           @llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32> %x,
7491                                               metadata !"round.dynamic",
7492                                               metadata !"fpexcept.strict") #0
7493  ret <3 x float> %result
7494}
7495
7496define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
7497; CHECK-LABEL: constrained_vector_uitofp_v3f64_v3i64:
7498; CHECK:       # %bb.0: # %entry
7499; CHECK-NEXT:    movq %rdi, %rax
7500; CHECK-NEXT:    shrq %rax
7501; CHECK-NEXT:    movl %edi, %ecx
7502; CHECK-NEXT:    andl $1, %ecx
7503; CHECK-NEXT:    orq %rax, %rcx
7504; CHECK-NEXT:    testq %rdi, %rdi
7505; CHECK-NEXT:    cmovnsq %rdi, %rcx
7506; CHECK-NEXT:    cvtsi2sd %rcx, %xmm0
7507; CHECK-NEXT:    jns .LBB183_2
7508; CHECK-NEXT:  # %bb.1:
7509; CHECK-NEXT:    addsd %xmm0, %xmm0
7510; CHECK-NEXT:  .LBB183_2: # %entry
7511; CHECK-NEXT:    movq %rsi, %rax
7512; CHECK-NEXT:    shrq %rax
7513; CHECK-NEXT:    movl %esi, %ecx
7514; CHECK-NEXT:    andl $1, %ecx
7515; CHECK-NEXT:    orq %rax, %rcx
7516; CHECK-NEXT:    testq %rsi, %rsi
7517; CHECK-NEXT:    cmovnsq %rsi, %rcx
7518; CHECK-NEXT:    cvtsi2sd %rcx, %xmm1
7519; CHECK-NEXT:    jns .LBB183_4
7520; CHECK-NEXT:  # %bb.3:
7521; CHECK-NEXT:    addsd %xmm1, %xmm1
7522; CHECK-NEXT:  .LBB183_4: # %entry
7523; CHECK-NEXT:    movq %rdx, %rax
7524; CHECK-NEXT:    shrq %rax
7525; CHECK-NEXT:    movl %edx, %ecx
7526; CHECK-NEXT:    andl $1, %ecx
7527; CHECK-NEXT:    orq %rax, %rcx
7528; CHECK-NEXT:    testq %rdx, %rdx
7529; CHECK-NEXT:    cmovnsq %rdx, %rcx
7530; CHECK-NEXT:    cvtsi2sd %rcx, %xmm2
7531; CHECK-NEXT:    jns .LBB183_6
7532; CHECK-NEXT:  # %bb.5:
7533; CHECK-NEXT:    addsd %xmm2, %xmm2
7534; CHECK-NEXT:  .LBB183_6: # %entry
7535; CHECK-NEXT:    movsd %xmm2, -{{[0-9]+}}(%rsp)
7536; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
7537; CHECK-NEXT:    wait
7538; CHECK-NEXT:    retq
7539;
7540; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i64:
7541; AVX1:       # %bb.0: # %entry
7542; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
7543; AVX1-NEXT:    movq %rax, %rcx
7544; AVX1-NEXT:    shrq %rcx
7545; AVX1-NEXT:    movl %eax, %edx
7546; AVX1-NEXT:    andl $1, %edx
7547; AVX1-NEXT:    orq %rcx, %rdx
7548; AVX1-NEXT:    testq %rax, %rax
7549; AVX1-NEXT:    cmovnsq %rax, %rdx
7550; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm1, %xmm1
7551; AVX1-NEXT:    jns .LBB183_2
7552; AVX1-NEXT:  # %bb.1:
7553; AVX1-NEXT:    vaddsd %xmm1, %xmm1, %xmm1
7554; AVX1-NEXT:  .LBB183_2: # %entry
7555; AVX1-NEXT:    vmovq %xmm0, %rax
7556; AVX1-NEXT:    movq %rax, %rcx
7557; AVX1-NEXT:    shrq %rcx
7558; AVX1-NEXT:    movl %eax, %edx
7559; AVX1-NEXT:    andl $1, %edx
7560; AVX1-NEXT:    orq %rcx, %rdx
7561; AVX1-NEXT:    testq %rax, %rax
7562; AVX1-NEXT:    cmovnsq %rax, %rdx
7563; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm2, %xmm2
7564; AVX1-NEXT:    jns .LBB183_4
7565; AVX1-NEXT:  # %bb.3:
7566; AVX1-NEXT:    vaddsd %xmm2, %xmm2, %xmm2
7567; AVX1-NEXT:  .LBB183_4: # %entry
7568; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
7569; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
7570; AVX1-NEXT:    vmovq %xmm0, %rax
7571; AVX1-NEXT:    movq %rax, %rcx
7572; AVX1-NEXT:    shrq %rcx
7573; AVX1-NEXT:    movl %eax, %edx
7574; AVX1-NEXT:    andl $1, %edx
7575; AVX1-NEXT:    orq %rcx, %rdx
7576; AVX1-NEXT:    testq %rax, %rax
7577; AVX1-NEXT:    cmovnsq %rax, %rdx
7578; AVX1-NEXT:    vcvtsi2sd %rdx, %xmm3, %xmm0
7579; AVX1-NEXT:    jns .LBB183_6
7580; AVX1-NEXT:  # %bb.5:
7581; AVX1-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
7582; AVX1-NEXT:  .LBB183_6: # %entry
7583; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
7584; AVX1-NEXT:    retq
7585;
7586; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i64:
7587; AVX512:       # %bb.0: # %entry
7588; AVX512-NEXT:    vpextrq $1, %xmm0, %rax
7589; AVX512-NEXT:    vcvtusi2sd %rax, %xmm1, %xmm1
7590; AVX512-NEXT:    vmovq %xmm0, %rax
7591; AVX512-NEXT:    vcvtusi2sd %rax, %xmm2, %xmm2
7592; AVX512-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
7593; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
7594; AVX512-NEXT:    vmovq %xmm0, %rax
7595; AVX512-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm0
7596; AVX512-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
7597; AVX512-NEXT:    retq
7598entry:
7599  %result = call <3 x double>
7600           @llvm.experimental.constrained.uitofp.v3f64.v3i64(<3 x i64> %x,
7601                                               metadata !"round.dynamic",
7602                                               metadata !"fpexcept.strict") #0
7603  ret <3 x double> %result
7604}
7605
7606define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
7607; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i64:
7608; CHECK:       # %bb.0: # %entry
7609; CHECK-NEXT:    movq %rsi, %rax
7610; CHECK-NEXT:    shrq %rax
7611; CHECK-NEXT:    movl %esi, %ecx
7612; CHECK-NEXT:    andl $1, %ecx
7613; CHECK-NEXT:    orq %rax, %rcx
7614; CHECK-NEXT:    testq %rsi, %rsi
7615; CHECK-NEXT:    cmovnsq %rsi, %rcx
7616; CHECK-NEXT:    cvtsi2ss %rcx, %xmm1
7617; CHECK-NEXT:    jns .LBB184_2
7618; CHECK-NEXT:  # %bb.1:
7619; CHECK-NEXT:    addss %xmm1, %xmm1
7620; CHECK-NEXT:  .LBB184_2: # %entry
7621; CHECK-NEXT:    movq %rdi, %rax
7622; CHECK-NEXT:    shrq %rax
7623; CHECK-NEXT:    movl %edi, %ecx
7624; CHECK-NEXT:    andl $1, %ecx
7625; CHECK-NEXT:    orq %rax, %rcx
7626; CHECK-NEXT:    testq %rdi, %rdi
7627; CHECK-NEXT:    cmovnsq %rdi, %rcx
7628; CHECK-NEXT:    cvtsi2ss %rcx, %xmm0
7629; CHECK-NEXT:    jns .LBB184_4
7630; CHECK-NEXT:  # %bb.3:
7631; CHECK-NEXT:    addss %xmm0, %xmm0
7632; CHECK-NEXT:  .LBB184_4: # %entry
7633; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7634; CHECK-NEXT:    movq %rdx, %rax
7635; CHECK-NEXT:    shrq %rax
7636; CHECK-NEXT:    movl %edx, %ecx
7637; CHECK-NEXT:    andl $1, %ecx
7638; CHECK-NEXT:    orq %rax, %rcx
7639; CHECK-NEXT:    testq %rdx, %rdx
7640; CHECK-NEXT:    cmovnsq %rdx, %rcx
7641; CHECK-NEXT:    xorps %xmm1, %xmm1
7642; CHECK-NEXT:    cvtsi2ss %rcx, %xmm1
7643; CHECK-NEXT:    jns .LBB184_6
7644; CHECK-NEXT:  # %bb.5:
7645; CHECK-NEXT:    addss %xmm1, %xmm1
7646; CHECK-NEXT:  .LBB184_6: # %entry
7647; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
7648; CHECK-NEXT:    retq
7649;
7650; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i64:
7651; AVX1:       # %bb.0: # %entry
7652; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
7653; AVX1-NEXT:    movq %rax, %rcx
7654; AVX1-NEXT:    shrq %rcx
7655; AVX1-NEXT:    movl %eax, %edx
7656; AVX1-NEXT:    andl $1, %edx
7657; AVX1-NEXT:    orq %rcx, %rdx
7658; AVX1-NEXT:    testq %rax, %rax
7659; AVX1-NEXT:    cmovnsq %rax, %rdx
7660; AVX1-NEXT:    vcvtsi2ss %rdx, %xmm1, %xmm1
7661; AVX1-NEXT:    jns .LBB184_2
7662; AVX1-NEXT:  # %bb.1:
7663; AVX1-NEXT:    vaddss %xmm1, %xmm1, %xmm1
7664; AVX1-NEXT:  .LBB184_2: # %entry
7665; AVX1-NEXT:    vmovq %xmm0, %rax
7666; AVX1-NEXT:    movq %rax, %rcx
7667; AVX1-NEXT:    shrq %rcx
7668; AVX1-NEXT:    movl %eax, %edx
7669; AVX1-NEXT:    andl $1, %edx
7670; AVX1-NEXT:    orq %rcx, %rdx
7671; AVX1-NEXT:    testq %rax, %rax
7672; AVX1-NEXT:    cmovnsq %rax, %rdx
7673; AVX1-NEXT:    vcvtsi2ss %rdx, %xmm2, %xmm2
7674; AVX1-NEXT:    jns .LBB184_4
7675; AVX1-NEXT:  # %bb.3:
7676; AVX1-NEXT:    vaddss %xmm2, %xmm2, %xmm2
7677; AVX1-NEXT:  .LBB184_4: # %entry
7678; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
7679; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
7680; AVX1-NEXT:    vmovq %xmm0, %rax
7681; AVX1-NEXT:    movq %rax, %rcx
7682; AVX1-NEXT:    shrq %rcx
7683; AVX1-NEXT:    movl %eax, %edx
7684; AVX1-NEXT:    andl $1, %edx
7685; AVX1-NEXT:    orq %rcx, %rdx
7686; AVX1-NEXT:    testq %rax, %rax
7687; AVX1-NEXT:    cmovnsq %rax, %rdx
7688; AVX1-NEXT:    vcvtsi2ss %rdx, %xmm3, %xmm0
7689; AVX1-NEXT:    jns .LBB184_6
7690; AVX1-NEXT:  # %bb.5:
7691; AVX1-NEXT:    vaddss %xmm0, %xmm0, %xmm0
7692; AVX1-NEXT:  .LBB184_6: # %entry
7693; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
7694; AVX1-NEXT:    vzeroupper
7695; AVX1-NEXT:    retq
7696;
7697; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i64:
7698; AVX512:       # %bb.0: # %entry
7699; AVX512-NEXT:    vpextrq $1, %xmm0, %rax
7700; AVX512-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
7701; AVX512-NEXT:    vmovq %xmm0, %rax
7702; AVX512-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm2
7703; AVX512-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
7704; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
7705; AVX512-NEXT:    vmovq %xmm0, %rax
7706; AVX512-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm0
7707; AVX512-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
7708; AVX512-NEXT:    vzeroupper
7709; AVX512-NEXT:    retq
7710entry:
7711  %result = call <3 x float>
7712           @llvm.experimental.constrained.uitofp.v3f32.v3i64(<3 x i64> %x,
7713                                               metadata !"round.dynamic",
7714                                               metadata !"fpexcept.strict") #0
7715  ret <3 x float> %result
7716}
7717
7718define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 {
7719; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i32:
7720; CHECK:       # %bb.0: # %entry
7721; CHECK-NEXT:    xorpd %xmm2, %xmm2
7722; CHECK-NEXT:    movapd %xmm0, %xmm1
7723; CHECK-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
7724; CHECK-NEXT:    movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15]
7725; CHECK-NEXT:    orpd %xmm3, %xmm1
7726; CHECK-NEXT:    subpd %xmm3, %xmm1
7727; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
7728; CHECK-NEXT:    orpd %xmm3, %xmm0
7729; CHECK-NEXT:    subpd %xmm3, %xmm0
7730; CHECK-NEXT:    retq
7731;
7732; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i32:
7733; AVX1:       # %bb.0: # %entry
7734; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
7735; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7736; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7737; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
7738; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15]
7739; AVX1-NEXT:    vorpd %ymm1, %ymm0, %ymm0
7740; AVX1-NEXT:    vsubpd %ymm1, %ymm0, %ymm0
7741; AVX1-NEXT:    retq
7742;
7743; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i32:
7744; AVX512:       # %bb.0: # %entry
7745; AVX512-NEXT:    vmovaps %xmm0, %xmm0
7746; AVX512-NEXT:    vcvtudq2pd %ymm0, %zmm0
7747; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
7748; AVX512-NEXT:    retq
7749entry:
7750  %result = call <4 x double>
7751           @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %x,
7752                                               metadata !"round.dynamic",
7753                                               metadata !"fpexcept.strict") #0
7754  ret <4 x double> %result
7755}
7756
7757define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 {
7758; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i32:
7759; CHECK:       # %bb.0: # %entry
7760; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
7761; CHECK-NEXT:    pand %xmm0, %xmm1
7762; CHECK-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7763; CHECK-NEXT:    psrld $16, %xmm0
7764; CHECK-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7765; CHECK-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7766; CHECK-NEXT:    addps %xmm1, %xmm0
7767; CHECK-NEXT:    retq
7768;
7769; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i32:
7770; AVX1:       # %bb.0: # %entry
7771; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
7772; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
7773; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
7774; AVX1-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
7775; AVX1-NEXT:    vaddps %xmm0, %xmm1, %xmm0
7776; AVX1-NEXT:    retq
7777;
7778; AVX512-LABEL: constrained_vector_uitofp_v4f32_v4i32:
7779; AVX512:       # %bb.0: # %entry
7780; AVX512-NEXT:    vmovaps %xmm0, %xmm0
7781; AVX512-NEXT:    vcvtudq2ps %zmm0, %zmm0
7782; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
7783; AVX512-NEXT:    vzeroupper
7784; AVX512-NEXT:    retq
7785entry:
7786  %result = call <4 x float>
7787           @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x,
7788                                               metadata !"round.dynamic",
7789                                               metadata !"fpexcept.strict") #0
7790  ret <4 x float> %result
7791}
7792
7793define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
7794; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i64:
7795; CHECK:       # %bb.0: # %entry
7796; CHECK-NEXT:    movdqa %xmm0, %xmm2
7797; CHECK-NEXT:    movq %xmm0, %rax
7798; CHECK-NEXT:    movq %rax, %rcx
7799; CHECK-NEXT:    shrq %rcx
7800; CHECK-NEXT:    movl %eax, %edx
7801; CHECK-NEXT:    andl $1, %edx
7802; CHECK-NEXT:    orq %rcx, %rdx
7803; CHECK-NEXT:    testq %rax, %rax
7804; CHECK-NEXT:    cmovnsq %rax, %rdx
7805; CHECK-NEXT:    xorps %xmm0, %xmm0
7806; CHECK-NEXT:    cvtsi2sd %rdx, %xmm0
7807; CHECK-NEXT:    jns .LBB187_2
7808; CHECK-NEXT:  # %bb.1:
7809; CHECK-NEXT:    addsd %xmm0, %xmm0
7810; CHECK-NEXT:  .LBB187_2: # %entry
7811; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
7812; CHECK-NEXT:    movq %xmm2, %rax
7813; CHECK-NEXT:    movq %rax, %rcx
7814; CHECK-NEXT:    shrq %rcx
7815; CHECK-NEXT:    movl %eax, %edx
7816; CHECK-NEXT:    andl $1, %edx
7817; CHECK-NEXT:    orq %rcx, %rdx
7818; CHECK-NEXT:    testq %rax, %rax
7819; CHECK-NEXT:    cmovnsq %rax, %rdx
7820; CHECK-NEXT:    cvtsi2sd %rdx, %xmm3
7821; CHECK-NEXT:    jns .LBB187_4
7822; CHECK-NEXT:  # %bb.3:
7823; CHECK-NEXT:    addsd %xmm3, %xmm3
7824; CHECK-NEXT:  .LBB187_4: # %entry
7825; CHECK-NEXT:    movq %xmm1, %rax
7826; CHECK-NEXT:    movq %rax, %rcx
7827; CHECK-NEXT:    shrq %rcx
7828; CHECK-NEXT:    movl %eax, %edx
7829; CHECK-NEXT:    andl $1, %edx
7830; CHECK-NEXT:    orq %rcx, %rdx
7831; CHECK-NEXT:    testq %rax, %rax
7832; CHECK-NEXT:    cmovnsq %rax, %rdx
7833; CHECK-NEXT:    xorps %xmm2, %xmm2
7834; CHECK-NEXT:    cvtsi2sd %rdx, %xmm2
7835; CHECK-NEXT:    jns .LBB187_6
7836; CHECK-NEXT:  # %bb.5:
7837; CHECK-NEXT:    addsd %xmm2, %xmm2
7838; CHECK-NEXT:  .LBB187_6: # %entry
7839; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
7840; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
7841; CHECK-NEXT:    movq %xmm1, %rax
7842; CHECK-NEXT:    movq %rax, %rcx
7843; CHECK-NEXT:    shrq %rcx
7844; CHECK-NEXT:    movl %eax, %edx
7845; CHECK-NEXT:    andl $1, %edx
7846; CHECK-NEXT:    orq %rcx, %rdx
7847; CHECK-NEXT:    testq %rax, %rax
7848; CHECK-NEXT:    cmovnsq %rax, %rdx
7849; CHECK-NEXT:    xorps %xmm1, %xmm1
7850; CHECK-NEXT:    cvtsi2sd %rdx, %xmm1
7851; CHECK-NEXT:    jns .LBB187_8
7852; CHECK-NEXT:  # %bb.7:
7853; CHECK-NEXT:    addsd %xmm1, %xmm1
7854; CHECK-NEXT:  .LBB187_8: # %entry
7855; CHECK-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
7856; CHECK-NEXT:    movapd %xmm2, %xmm1
7857; CHECK-NEXT:    retq
7858;
7859; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i64:
7860; AVX1:       # %bb.0: # %entry
7861; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
7862; AVX1-NEXT:    vpextrd $2, %xmm1, %eax
7863; AVX1-NEXT:    vcvtsi2sd %rax, %xmm2, %xmm2
7864; AVX1-NEXT:    vmovd %xmm1, %eax
7865; AVX1-NEXT:    vcvtsi2sd %rax, %xmm3, %xmm3
7866; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
7867; AVX1-NEXT:    vextractps $2, %xmm0, %eax
7868; AVX1-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm3
7869; AVX1-NEXT:    vmovq %xmm0, %rax
7870; AVX1-NEXT:    movl %eax, %eax
7871; AVX1-NEXT:    vcvtsi2sd %rax, %xmm4, %xmm4
7872; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
7873; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
7874; AVX1-NEXT:    vpextrd $3, %xmm1, %eax
7875; AVX1-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm3
7876; AVX1-NEXT:    vpextrd $1, %xmm1, %eax
7877; AVX1-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm1
7878; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
7879; AVX1-NEXT:    vpextrd $3, %xmm0, %eax
7880; AVX1-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm3
7881; AVX1-NEXT:    vpextrd $1, %xmm0, %eax
7882; AVX1-NEXT:    vcvtsi2sd %rax, %xmm5, %xmm0
7883; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
7884; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
7885; AVX1-NEXT:    vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
7886; AVX1-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
7887; AVX1-NEXT:    retq
7888;
7889; AVX512F-LABEL: constrained_vector_uitofp_v4f64_v4i64:
7890; AVX512F:       # %bb.0: # %entry
7891; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
7892; AVX512F-NEXT:    vpextrq $1, %xmm1, %rax
7893; AVX512F-NEXT:    vcvtusi2sd %rax, %xmm2, %xmm2
7894; AVX512F-NEXT:    vmovq %xmm1, %rax
7895; AVX512F-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm1
7896; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
7897; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
7898; AVX512F-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm2
7899; AVX512F-NEXT:    vmovq %xmm0, %rax
7900; AVX512F-NEXT:    vcvtusi2sd %rax, %xmm3, %xmm0
7901; AVX512F-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
7902; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
7903; AVX512F-NEXT:    retq
7904;
7905; AVX512DQ-LABEL: constrained_vector_uitofp_v4f64_v4i64:
7906; AVX512DQ:       # %bb.0: # %entry
7907; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
7908; AVX512DQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
7909; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
7910; AVX512DQ-NEXT:    retq
7911entry:
7912  %result = call <4 x double>
7913           @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x,
7914                                               metadata !"round.dynamic",
7915                                               metadata !"fpexcept.strict") #0
7916  ret <4 x double> %result
7917}
7918
7919define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
7920; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i64:
7921; CHECK:       # %bb.0: # %entry
7922; CHECK-NEXT:    movq %xmm1, %rax
7923; CHECK-NEXT:    movq %rax, %rcx
7924; CHECK-NEXT:    shrq %rcx
7925; CHECK-NEXT:    movl %eax, %edx
7926; CHECK-NEXT:    andl $1, %edx
7927; CHECK-NEXT:    orq %rcx, %rdx
7928; CHECK-NEXT:    testq %rax, %rax
7929; CHECK-NEXT:    cmovnsq %rax, %rdx
7930; CHECK-NEXT:    cvtsi2ss %rdx, %xmm2
7931; CHECK-NEXT:    jns .LBB188_2
7932; CHECK-NEXT:  # %bb.1:
7933; CHECK-NEXT:    addss %xmm2, %xmm2
7934; CHECK-NEXT:  .LBB188_2: # %entry
7935; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
7936; CHECK-NEXT:    movq %xmm1, %rax
7937; CHECK-NEXT:    movq %rax, %rcx
7938; CHECK-NEXT:    shrq %rcx
7939; CHECK-NEXT:    movl %eax, %edx
7940; CHECK-NEXT:    andl $1, %edx
7941; CHECK-NEXT:    orq %rcx, %rdx
7942; CHECK-NEXT:    testq %rax, %rax
7943; CHECK-NEXT:    cmovnsq %rax, %rdx
7944; CHECK-NEXT:    cvtsi2ss %rdx, %xmm3
7945; CHECK-NEXT:    jns .LBB188_4
7946; CHECK-NEXT:  # %bb.3:
7947; CHECK-NEXT:    addss %xmm3, %xmm3
7948; CHECK-NEXT:  .LBB188_4: # %entry
7949; CHECK-NEXT:    movq %xmm0, %rax
7950; CHECK-NEXT:    movq %rax, %rcx
7951; CHECK-NEXT:    shrq %rcx
7952; CHECK-NEXT:    movl %eax, %edx
7953; CHECK-NEXT:    andl $1, %edx
7954; CHECK-NEXT:    orq %rcx, %rdx
7955; CHECK-NEXT:    testq %rax, %rax
7956; CHECK-NEXT:    cmovnsq %rax, %rdx
7957; CHECK-NEXT:    xorps %xmm1, %xmm1
7958; CHECK-NEXT:    cvtsi2ss %rdx, %xmm1
7959; CHECK-NEXT:    jns .LBB188_6
7960; CHECK-NEXT:  # %bb.5:
7961; CHECK-NEXT:    addss %xmm1, %xmm1
7962; CHECK-NEXT:  .LBB188_6: # %entry
7963; CHECK-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
7964; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
7965; CHECK-NEXT:    movq %xmm0, %rax
7966; CHECK-NEXT:    movq %rax, %rcx
7967; CHECK-NEXT:    shrq %rcx
7968; CHECK-NEXT:    movl %eax, %edx
7969; CHECK-NEXT:    andl $1, %edx
7970; CHECK-NEXT:    orq %rcx, %rdx
7971; CHECK-NEXT:    testq %rax, %rax
7972; CHECK-NEXT:    cmovnsq %rax, %rdx
7973; CHECK-NEXT:    xorps %xmm0, %xmm0
7974; CHECK-NEXT:    cvtsi2ss %rdx, %xmm0
7975; CHECK-NEXT:    jns .LBB188_8
7976; CHECK-NEXT:  # %bb.7:
7977; CHECK-NEXT:    addss %xmm0, %xmm0
7978; CHECK-NEXT:  .LBB188_8: # %entry
7979; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7980; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
7981; CHECK-NEXT:    movaps %xmm1, %xmm0
7982; CHECK-NEXT:    retq
7983;
7984; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i64:
7985; AVX1:       # %bb.0: # %entry
7986; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm1
7987; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7988; AVX1-NEXT:    vpsrlq $1, %xmm2, %xmm3
7989; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
7990; AVX1-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
7991; AVX1-NEXT:    vorpd %ymm3, %ymm1, %ymm1
7992; AVX1-NEXT:    vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
7993; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
7994; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm3
7995; AVX1-NEXT:    vmovq %xmm1, %rax
7996; AVX1-NEXT:    vcvtsi2ss %rax, %xmm4, %xmm4
7997; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
7998; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
7999; AVX1-NEXT:    vmovq %xmm1, %rax
8000; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
8001; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
8002; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
8003; AVX1-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm1
8004; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
8005; AVX1-NEXT:    vaddps %xmm1, %xmm1, %xmm3
8006; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
8007; AVX1-NEXT:    vblendvps %xmm0, %xmm3, %xmm1, %xmm0
8008; AVX1-NEXT:    vzeroupper
8009; AVX1-NEXT:    retq
8010;
8011; AVX512F-LABEL: constrained_vector_uitofp_v4f32_v4i64:
8012; AVX512F:       # %bb.0: # %entry
8013; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
8014; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
8015; AVX512F-NEXT:    vmovq %xmm0, %rax
8016; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm2
8017; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
8018; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
8019; AVX512F-NEXT:    vmovq %xmm0, %rax
8020; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm2
8021; AVX512F-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
8022; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
8023; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm3, %xmm0
8024; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
8025; AVX512F-NEXT:    vzeroupper
8026; AVX512F-NEXT:    retq
8027;
8028; AVX512DQ-LABEL: constrained_vector_uitofp_v4f32_v4i64:
8029; AVX512DQ:       # %bb.0: # %entry
8030; AVX512DQ-NEXT:    vmovaps %ymm0, %ymm0
8031; AVX512DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
8032; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
8033; AVX512DQ-NEXT:    vzeroupper
8034; AVX512DQ-NEXT:    retq
8035entry:
8036  %result = call <4 x float>
8037           @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x,
8038                                               metadata !"round.dynamic",
8039                                               metadata !"fpexcept.strict") #0
8040  ret <4 x float> %result
8041}
8042
8043; Simple test to make sure we don't fuse vselect+strict_fadd into a masked operation.
8044define <16 x float> @vpaddd_mask_test(<16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone strictfp {
8045; CHECK-LABEL: vpaddd_mask_test:
8046; CHECK:       # %bb.0:
8047; CHECK-NEXT:    pxor %xmm10, %xmm10
8048; CHECK-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
8049; CHECK-NEXT:    pcmpeqd %xmm10, %xmm8
8050; CHECK-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
8051; CHECK-NEXT:    pcmpeqd %xmm10, %xmm9
8052; CHECK-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm11
8053; CHECK-NEXT:    pcmpeqd %xmm10, %xmm11
8054; CHECK-NEXT:    pcmpeqd {{[0-9]+}}(%rsp), %xmm10
8055; CHECK-NEXT:    addps %xmm3, %xmm7
8056; CHECK-NEXT:    addps %xmm2, %xmm6
8057; CHECK-NEXT:    addps %xmm1, %xmm5
8058; CHECK-NEXT:    addps %xmm0, %xmm4
8059; CHECK-NEXT:    andps %xmm10, %xmm0
8060; CHECK-NEXT:    andnps %xmm4, %xmm10
8061; CHECK-NEXT:    orps %xmm10, %xmm0
8062; CHECK-NEXT:    andps %xmm11, %xmm1
8063; CHECK-NEXT:    andnps %xmm5, %xmm11
8064; CHECK-NEXT:    orps %xmm11, %xmm1
8065; CHECK-NEXT:    andps %xmm9, %xmm2
8066; CHECK-NEXT:    andnps %xmm6, %xmm9
8067; CHECK-NEXT:    orps %xmm9, %xmm2
8068; CHECK-NEXT:    andps %xmm8, %xmm3
8069; CHECK-NEXT:    andnps %xmm7, %xmm8
8070; CHECK-NEXT:    orps %xmm8, %xmm3
8071; CHECK-NEXT:    retq
8072;
8073; AVX1-LABEL: vpaddd_mask_test:
8074; AVX1:       # %bb.0:
8075; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
8076; AVX1-NEXT:    vpxor %xmm7, %xmm7, %xmm7
8077; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm6, %xmm6
8078; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm5, %xmm5
8079; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
8080; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm6
8081; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm6, %xmm6
8082; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm4, %xmm4
8083; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm4, %ymm4
8084; AVX1-NEXT:    vaddps %ymm3, %ymm1, %ymm3
8085; AVX1-NEXT:    vaddps %ymm2, %ymm0, %ymm2
8086; AVX1-NEXT:    vblendvps %ymm4, %ymm0, %ymm2, %ymm0
8087; AVX1-NEXT:    vblendvps %ymm5, %ymm1, %ymm3, %ymm1
8088; AVX1-NEXT:    retq
8089;
8090; AVX512-LABEL: vpaddd_mask_test:
8091; AVX512:       # %bb.0:
8092; AVX512-NEXT:    vptestmd %zmm2, %zmm2, %k1
8093; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm1
8094; AVX512-NEXT:    vmovaps %zmm1, %zmm0 {%k1}
8095; AVX512-NEXT:    retq
8096  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
8097  %x = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %i, <16 x float> %j, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
8098  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %i
8099  ret <16 x float> %r
8100}
8101
8102define <1 x float> @constrained_vector_tan_v1f32() #0 {
8103; CHECK-LABEL: constrained_vector_tan_v1f32:
8104; CHECK:       # %bb.0: # %entry
8105; CHECK-NEXT:    pushq %rax
8106; CHECK-NEXT:    .cfi_def_cfa_offset 16
8107; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8108; CHECK-NEXT:    callq tanf@PLT
8109; CHECK-NEXT:    popq %rax
8110; CHECK-NEXT:    .cfi_def_cfa_offset 8
8111; CHECK-NEXT:    retq
8112;
8113; AVX-LABEL: constrained_vector_tan_v1f32:
8114; AVX:       # %bb.0: # %entry
8115; AVX-NEXT:    pushq %rax
8116; AVX-NEXT:    .cfi_def_cfa_offset 16
8117; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8118; AVX-NEXT:    callq tanf@PLT
8119; AVX-NEXT:    popq %rax
8120; AVX-NEXT:    .cfi_def_cfa_offset 8
8121; AVX-NEXT:    retq
8122entry:
8123  %tan = call <1 x float> @llvm.experimental.constrained.tan.v1f32(
8124                             <1 x float> <float 42.0>,
8125                             metadata !"round.dynamic",
8126                             metadata !"fpexcept.strict") #0
8127  ret <1 x float> %tan
8128}
8129
8130define <2 x double> @constrained_vector_tan_v2f64() #0 {
8131; CHECK-LABEL: constrained_vector_tan_v2f64:
8132; CHECK:       # %bb.0: # %entry
8133; CHECK-NEXT:    subq $24, %rsp
8134; CHECK-NEXT:    .cfi_def_cfa_offset 32
8135; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8136; CHECK-NEXT:    callq tan@PLT
8137; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8138; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8139; CHECK-NEXT:    callq tan@PLT
8140; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
8141; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
8142; CHECK-NEXT:    addq $24, %rsp
8143; CHECK-NEXT:    .cfi_def_cfa_offset 8
8144; CHECK-NEXT:    retq
8145;
8146; AVX-LABEL: constrained_vector_tan_v2f64:
8147; AVX:       # %bb.0: # %entry
8148; AVX-NEXT:    subq $24, %rsp
8149; AVX-NEXT:    .cfi_def_cfa_offset 32
8150; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8151; AVX-NEXT:    callq tan@PLT
8152; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8153; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8154; AVX-NEXT:    callq tan@PLT
8155; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
8156; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8157; AVX-NEXT:    addq $24, %rsp
8158; AVX-NEXT:    .cfi_def_cfa_offset 8
8159; AVX-NEXT:    retq
8160entry:
8161  %tan = call <2 x double> @llvm.experimental.constrained.tan.v2f64(
8162                             <2 x double> <double 42.0, double 42.1>,
8163                             metadata !"round.dynamic",
8164                             metadata !"fpexcept.strict") #0
8165  ret <2 x double> %tan
8166}
8167
8168define <3 x float> @constrained_vector_tan_v3f32() #0 {
8169; CHECK-LABEL: constrained_vector_tan_v3f32:
8170; CHECK:       # %bb.0: # %entry
8171; CHECK-NEXT:    subq $40, %rsp
8172; CHECK-NEXT:    .cfi_def_cfa_offset 48
8173; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
8174; CHECK-NEXT:    callq tanf@PLT
8175; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8176; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8177; CHECK-NEXT:    callq tanf@PLT
8178; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8179; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
8180; CHECK-NEXT:    callq tanf@PLT
8181; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
8182; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8183; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
8184; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
8185; CHECK-NEXT:    movaps %xmm1, %xmm0
8186; CHECK-NEXT:    addq $40, %rsp
8187; CHECK-NEXT:    .cfi_def_cfa_offset 8
8188; CHECK-NEXT:    retq
8189;
8190; AVX-LABEL: constrained_vector_tan_v3f32:
8191; AVX:       # %bb.0: # %entry
8192; AVX-NEXT:    subq $40, %rsp
8193; AVX-NEXT:    .cfi_def_cfa_offset 48
8194; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
8195; AVX-NEXT:    callq tanf@PLT
8196; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8197; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8198; AVX-NEXT:    callq tanf@PLT
8199; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8200; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
8201; AVX-NEXT:    callq tanf@PLT
8202; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
8203; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
8204; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
8205; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
8206; AVX-NEXT:    addq $40, %rsp
8207; AVX-NEXT:    .cfi_def_cfa_offset 8
8208; AVX-NEXT:    retq
8209entry:
8210  %tan = call <3 x float> @llvm.experimental.constrained.tan.v3f32(
8211                              <3 x float> <float 42.0, float 43.0, float 44.0>,
8212                              metadata !"round.dynamic",
8213                              metadata !"fpexcept.strict") #0
8214  ret <3 x float> %tan
8215}
8216
8217define <3 x double> @constrained_vector_tan_v3f64() #0 {
8218; CHECK-LABEL: constrained_vector_tan_v3f64:
8219; CHECK:       # %bb.0: # %entry
8220; CHECK-NEXT:    subq $24, %rsp
8221; CHECK-NEXT:    .cfi_def_cfa_offset 32
8222; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8223; CHECK-NEXT:    callq tan@PLT
8224; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
8225; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8226; CHECK-NEXT:    callq tan@PLT
8227; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
8228; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8229; CHECK-NEXT:    callq tan@PLT
8230; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
8231; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
8232; CHECK-NEXT:    wait
8233; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
8234; CHECK-NEXT:    # xmm0 = mem[0],zero
8235; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
8236; CHECK-NEXT:    # xmm1 = mem[0],zero
8237; CHECK-NEXT:    addq $24, %rsp
8238; CHECK-NEXT:    .cfi_def_cfa_offset 8
8239; CHECK-NEXT:    retq
8240;
8241; AVX-LABEL: constrained_vector_tan_v3f64:
8242; AVX:       # %bb.0: # %entry
8243; AVX-NEXT:    subq $40, %rsp
8244; AVX-NEXT:    .cfi_def_cfa_offset 48
8245; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8246; AVX-NEXT:    callq tan@PLT
8247; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8248; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8249; AVX-NEXT:    callq tan@PLT
8250; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
8251; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8252; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
8253; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8254; AVX-NEXT:    vzeroupper
8255; AVX-NEXT:    callq tan@PLT
8256; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
8257; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
8258; AVX-NEXT:    addq $40, %rsp
8259; AVX-NEXT:    .cfi_def_cfa_offset 8
8260; AVX-NEXT:    retq
8261entry:
8262  %tan = call <3 x double> @llvm.experimental.constrained.tan.v3f64(
8263                          <3 x double> <double 42.0, double 42.1, double 42.2>,
8264                          metadata !"round.dynamic",
8265                          metadata !"fpexcept.strict") #0
8266  ret <3 x double> %tan
8267}
8268
8269define <4 x double> @constrained_vector_tan_v4f64() #0 {
8270; CHECK-LABEL: constrained_vector_tan_v4f64:
8271; CHECK:       # %bb.0: # %entry
8272; CHECK-NEXT:    subq $40, %rsp
8273; CHECK-NEXT:    .cfi_def_cfa_offset 48
8274; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8275; CHECK-NEXT:    callq tan@PLT
8276; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8277; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8278; CHECK-NEXT:    callq tan@PLT
8279; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
8280; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
8281; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8282; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
8283; CHECK-NEXT:    callq tan@PLT
8284; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8285; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8286; CHECK-NEXT:    callq tan@PLT
8287; CHECK-NEXT:    movaps %xmm0, %xmm1
8288; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
8289; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
8290; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
8291; CHECK-NEXT:    addq $40, %rsp
8292; CHECK-NEXT:    .cfi_def_cfa_offset 8
8293; CHECK-NEXT:    retq
8294;
8295; AVX-LABEL: constrained_vector_tan_v4f64:
8296; AVX:       # %bb.0: # %entry
8297; AVX-NEXT:    subq $40, %rsp
8298; AVX-NEXT:    .cfi_def_cfa_offset 48
8299; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
8300; AVX-NEXT:    callq tan@PLT
8301; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8302; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8303; AVX-NEXT:    callq tan@PLT
8304; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
8305; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8306; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8307; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8308; AVX-NEXT:    callq tan@PLT
8309; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8310; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8311; AVX-NEXT:    callq tan@PLT
8312; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
8313; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8314; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
8315; AVX-NEXT:    addq $40, %rsp
8316; AVX-NEXT:    .cfi_def_cfa_offset 8
8317; AVX-NEXT:    retq
8318entry:
8319  %tan = call <4 x double> @llvm.experimental.constrained.tan.v4f64(
8320                             <4 x double> <double 42.0, double 42.1,
8321                                           double 42.2, double 42.3>,
8322                             metadata !"round.dynamic",
8323                             metadata !"fpexcept.strict") #0
8324  ret <4 x double> %tan
8325}
8326
8327define <1 x float> @constrained_vector_acos_v1f32() #0 {
8328; CHECK-LABEL: constrained_vector_acos_v1f32:
8329; CHECK:       # %bb.0: # %entry
8330; CHECK-NEXT:    pushq %rax
8331; CHECK-NEXT:    .cfi_def_cfa_offset 16
8332; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8333; CHECK-NEXT:    callq acosf@PLT
8334; CHECK-NEXT:    popq %rax
8335; CHECK-NEXT:    .cfi_def_cfa_offset 8
8336; CHECK-NEXT:    retq
8337;
8338; AVX-LABEL: constrained_vector_acos_v1f32:
8339; AVX:       # %bb.0: # %entry
8340; AVX-NEXT:    pushq %rax
8341; AVX-NEXT:    .cfi_def_cfa_offset 16
8342; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8343; AVX-NEXT:    callq acosf@PLT
8344; AVX-NEXT:    popq %rax
8345; AVX-NEXT:    .cfi_def_cfa_offset 8
8346; AVX-NEXT:    retq
8347entry:
8348  %acos = call <1 x float> @llvm.experimental.constrained.acos.v1f32(
8349                             <1 x float> <float 42.0>,
8350                             metadata !"round.dynamic",
8351                             metadata !"fpexcept.strict") #0
8352  ret <1 x float> %acos
8353}
8354
8355define <2 x double> @constrained_vector_acos_v2f64() #0 {
8356; CHECK-LABEL: constrained_vector_acos_v2f64:
8357; CHECK:       # %bb.0: # %entry
8358; CHECK-NEXT:    subq $24, %rsp
8359; CHECK-NEXT:    .cfi_def_cfa_offset 32
8360; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8361; CHECK-NEXT:    callq acos@PLT
8362; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8363; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8364; CHECK-NEXT:    callq acos@PLT
8365; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
8366; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
8367; CHECK-NEXT:    addq $24, %rsp
8368; CHECK-NEXT:    .cfi_def_cfa_offset 8
8369; CHECK-NEXT:    retq
8370;
8371; AVX-LABEL: constrained_vector_acos_v2f64:
8372; AVX:       # %bb.0: # %entry
8373; AVX-NEXT:    subq $24, %rsp
8374; AVX-NEXT:    .cfi_def_cfa_offset 32
8375; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8376; AVX-NEXT:    callq acos@PLT
8377; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8378; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8379; AVX-NEXT:    callq acos@PLT
8380; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
8381; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8382; AVX-NEXT:    addq $24, %rsp
8383; AVX-NEXT:    .cfi_def_cfa_offset 8
8384; AVX-NEXT:    retq
8385entry:
8386  %acos = call <2 x double> @llvm.experimental.constrained.acos.v2f64(
8387                             <2 x double> <double 42.0, double 42.1>,
8388                             metadata !"round.dynamic",
8389                             metadata !"fpexcept.strict") #0
8390  ret <2 x double> %acos
8391}
8392
8393define <3 x float> @constrained_vector_acos_v3f32() #0 {
8394; CHECK-LABEL: constrained_vector_acos_v3f32:
8395; CHECK:       # %bb.0: # %entry
8396; CHECK-NEXT:    subq $40, %rsp
8397; CHECK-NEXT:    .cfi_def_cfa_offset 48
8398; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
8399; CHECK-NEXT:    callq acosf@PLT
8400; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8401; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8402; CHECK-NEXT:    callq acosf@PLT
8403; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8404; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
8405; CHECK-NEXT:    callq acosf@PLT
8406; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
8407; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8408; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
8409; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
8410; CHECK-NEXT:    movaps %xmm1, %xmm0
8411; CHECK-NEXT:    addq $40, %rsp
8412; CHECK-NEXT:    .cfi_def_cfa_offset 8
8413; CHECK-NEXT:    retq
8414;
8415; AVX-LABEL: constrained_vector_acos_v3f32:
8416; AVX:       # %bb.0: # %entry
8417; AVX-NEXT:    subq $40, %rsp
8418; AVX-NEXT:    .cfi_def_cfa_offset 48
8419; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
8420; AVX-NEXT:    callq acosf@PLT
8421; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8422; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8423; AVX-NEXT:    callq acosf@PLT
8424; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8425; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
8426; AVX-NEXT:    callq acosf@PLT
8427; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
8428; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
8429; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
8430; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
8431; AVX-NEXT:    addq $40, %rsp
8432; AVX-NEXT:    .cfi_def_cfa_offset 8
8433; AVX-NEXT:    retq
8434entry:
8435  %acos = call <3 x float> @llvm.experimental.constrained.acos.v3f32(
8436                              <3 x float> <float 42.0, float 43.0, float 44.0>,
8437                              metadata !"round.dynamic",
8438                              metadata !"fpexcept.strict") #0
8439  ret <3 x float> %acos
8440}
8441
8442define <3 x double> @constrained_vector_acos_v3f64() #0 {
8443; CHECK-LABEL: constrained_vector_acos_v3f64:
8444; CHECK:       # %bb.0: # %entry
8445; CHECK-NEXT:    subq $24, %rsp
8446; CHECK-NEXT:    .cfi_def_cfa_offset 32
8447; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8448; CHECK-NEXT:    callq acos@PLT
8449; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
8450; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8451; CHECK-NEXT:    callq acos@PLT
8452; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
8453; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8454; CHECK-NEXT:    callq acos@PLT
8455; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
8456; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
8457; CHECK-NEXT:    wait
8458; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
8459; CHECK-NEXT:    # xmm0 = mem[0],zero
8460; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
8461; CHECK-NEXT:    # xmm1 = mem[0],zero
8462; CHECK-NEXT:    addq $24, %rsp
8463; CHECK-NEXT:    .cfi_def_cfa_offset 8
8464; CHECK-NEXT:    retq
8465;
8466; AVX-LABEL: constrained_vector_acos_v3f64:
8467; AVX:       # %bb.0: # %entry
8468; AVX-NEXT:    subq $40, %rsp
8469; AVX-NEXT:    .cfi_def_cfa_offset 48
8470; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8471; AVX-NEXT:    callq acos@PLT
8472; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8473; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8474; AVX-NEXT:    callq acos@PLT
8475; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
8476; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8477; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
8478; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8479; AVX-NEXT:    vzeroupper
8480; AVX-NEXT:    callq acos@PLT
8481; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
8482; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
8483; AVX-NEXT:    addq $40, %rsp
8484; AVX-NEXT:    .cfi_def_cfa_offset 8
8485; AVX-NEXT:    retq
8486entry:
8487  %acos = call <3 x double> @llvm.experimental.constrained.acos.v3f64(
8488                          <3 x double> <double 42.0, double 42.1, double 42.2>,
8489                          metadata !"round.dynamic",
8490                          metadata !"fpexcept.strict") #0
8491  ret <3 x double> %acos
8492}
8493
8494define <4 x double> @constrained_vector_acos_v4f64() #0 {
8495; CHECK-LABEL: constrained_vector_acos_v4f64:
8496; CHECK:       # %bb.0: # %entry
8497; CHECK-NEXT:    subq $40, %rsp
8498; CHECK-NEXT:    .cfi_def_cfa_offset 48
8499; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8500; CHECK-NEXT:    callq acos@PLT
8501; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8502; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8503; CHECK-NEXT:    callq acos@PLT
8504; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
8505; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
8506; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8507; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
8508; CHECK-NEXT:    callq acos@PLT
8509; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8510; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8511; CHECK-NEXT:    callq acos@PLT
8512; CHECK-NEXT:    movaps %xmm0, %xmm1
8513; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
8514; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
8515; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
8516; CHECK-NEXT:    addq $40, %rsp
8517; CHECK-NEXT:    .cfi_def_cfa_offset 8
8518; CHECK-NEXT:    retq
8519;
8520; AVX-LABEL: constrained_vector_acos_v4f64:
8521; AVX:       # %bb.0: # %entry
8522; AVX-NEXT:    subq $40, %rsp
8523; AVX-NEXT:    .cfi_def_cfa_offset 48
8524; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
8525; AVX-NEXT:    callq acos@PLT
8526; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8527; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8528; AVX-NEXT:    callq acos@PLT
8529; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
8530; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8531; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8532; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8533; AVX-NEXT:    callq acos@PLT
8534; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8535; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8536; AVX-NEXT:    callq acos@PLT
8537; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
8538; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8539; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
8540; AVX-NEXT:    addq $40, %rsp
8541; AVX-NEXT:    .cfi_def_cfa_offset 8
8542; AVX-NEXT:    retq
8543entry:
8544  %acos = call <4 x double> @llvm.experimental.constrained.acos.v4f64(
8545                             <4 x double> <double 42.0, double 42.1,
8546                                           double 42.2, double 42.3>,
8547                             metadata !"round.dynamic",
8548                             metadata !"fpexcept.strict") #0
8549  ret <4 x double> %acos
8550}
8551
8552define <1 x float> @constrained_vector_asin_v1f32() #0 {
8553; CHECK-LABEL: constrained_vector_asin_v1f32:
8554; CHECK:       # %bb.0: # %entry
8555; CHECK-NEXT:    pushq %rax
8556; CHECK-NEXT:    .cfi_def_cfa_offset 16
8557; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8558; CHECK-NEXT:    callq asinf@PLT
8559; CHECK-NEXT:    popq %rax
8560; CHECK-NEXT:    .cfi_def_cfa_offset 8
8561; CHECK-NEXT:    retq
8562;
8563; AVX-LABEL: constrained_vector_asin_v1f32:
8564; AVX:       # %bb.0: # %entry
8565; AVX-NEXT:    pushq %rax
8566; AVX-NEXT:    .cfi_def_cfa_offset 16
8567; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8568; AVX-NEXT:    callq asinf@PLT
8569; AVX-NEXT:    popq %rax
8570; AVX-NEXT:    .cfi_def_cfa_offset 8
8571; AVX-NEXT:    retq
8572entry:
8573  %asin = call <1 x float> @llvm.experimental.constrained.asin.v1f32(
8574                             <1 x float> <float 42.0>,
8575                             metadata !"round.dynamic",
8576                             metadata !"fpexcept.strict") #0
8577  ret <1 x float> %asin
8578}
8579
8580define <2 x double> @constrained_vector_asin_v2f64() #0 {
8581; CHECK-LABEL: constrained_vector_asin_v2f64:
8582; CHECK:       # %bb.0: # %entry
8583; CHECK-NEXT:    subq $24, %rsp
8584; CHECK-NEXT:    .cfi_def_cfa_offset 32
8585; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8586; CHECK-NEXT:    callq asin@PLT
8587; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8588; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8589; CHECK-NEXT:    callq asin@PLT
8590; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
8591; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
8592; CHECK-NEXT:    addq $24, %rsp
8593; CHECK-NEXT:    .cfi_def_cfa_offset 8
8594; CHECK-NEXT:    retq
8595;
8596; AVX-LABEL: constrained_vector_asin_v2f64:
8597; AVX:       # %bb.0: # %entry
8598; AVX-NEXT:    subq $24, %rsp
8599; AVX-NEXT:    .cfi_def_cfa_offset 32
8600; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8601; AVX-NEXT:    callq asin@PLT
8602; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8603; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8604; AVX-NEXT:    callq asin@PLT
8605; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
8606; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8607; AVX-NEXT:    addq $24, %rsp
8608; AVX-NEXT:    .cfi_def_cfa_offset 8
8609; AVX-NEXT:    retq
8610entry:
8611  %asin = call <2 x double> @llvm.experimental.constrained.asin.v2f64(
8612                             <2 x double> <double 42.0, double 42.1>,
8613                             metadata !"round.dynamic",
8614                             metadata !"fpexcept.strict") #0
8615  ret <2 x double> %asin
8616}
8617
8618define <3 x float> @constrained_vector_asin_v3f32() #0 {
8619; CHECK-LABEL: constrained_vector_asin_v3f32:
8620; CHECK:       # %bb.0: # %entry
8621; CHECK-NEXT:    subq $40, %rsp
8622; CHECK-NEXT:    .cfi_def_cfa_offset 48
8623; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
8624; CHECK-NEXT:    callq asinf@PLT
8625; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8626; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8627; CHECK-NEXT:    callq asinf@PLT
8628; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8629; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
8630; CHECK-NEXT:    callq asinf@PLT
8631; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
8632; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8633; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
8634; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
8635; CHECK-NEXT:    movaps %xmm1, %xmm0
8636; CHECK-NEXT:    addq $40, %rsp
8637; CHECK-NEXT:    .cfi_def_cfa_offset 8
8638; CHECK-NEXT:    retq
8639;
8640; AVX-LABEL: constrained_vector_asin_v3f32:
8641; AVX:       # %bb.0: # %entry
8642; AVX-NEXT:    subq $40, %rsp
8643; AVX-NEXT:    .cfi_def_cfa_offset 48
8644; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
8645; AVX-NEXT:    callq asinf@PLT
8646; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8647; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8648; AVX-NEXT:    callq asinf@PLT
8649; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8650; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
8651; AVX-NEXT:    callq asinf@PLT
8652; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
8653; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
8654; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
8655; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
8656; AVX-NEXT:    addq $40, %rsp
8657; AVX-NEXT:    .cfi_def_cfa_offset 8
8658; AVX-NEXT:    retq
8659entry:
8660  %asin = call <3 x float> @llvm.experimental.constrained.asin.v3f32(
8661                              <3 x float> <float 42.0, float 43.0, float 44.0>,
8662                              metadata !"round.dynamic",
8663                              metadata !"fpexcept.strict") #0
8664  ret <3 x float> %asin
8665}
8666
8667define <3 x double> @constrained_vector_asin_v3f64() #0 {
8668; CHECK-LABEL: constrained_vector_asin_v3f64:
8669; CHECK:       # %bb.0: # %entry
8670; CHECK-NEXT:    subq $24, %rsp
8671; CHECK-NEXT:    .cfi_def_cfa_offset 32
8672; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8673; CHECK-NEXT:    callq asin@PLT
8674; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
8675; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8676; CHECK-NEXT:    callq asin@PLT
8677; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
8678; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8679; CHECK-NEXT:    callq asin@PLT
8680; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
8681; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
8682; CHECK-NEXT:    wait
8683; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
8684; CHECK-NEXT:    # xmm0 = mem[0],zero
8685; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
8686; CHECK-NEXT:    # xmm1 = mem[0],zero
8687; CHECK-NEXT:    addq $24, %rsp
8688; CHECK-NEXT:    .cfi_def_cfa_offset 8
8689; CHECK-NEXT:    retq
8690;
8691; AVX-LABEL: constrained_vector_asin_v3f64:
8692; AVX:       # %bb.0: # %entry
8693; AVX-NEXT:    subq $40, %rsp
8694; AVX-NEXT:    .cfi_def_cfa_offset 48
8695; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8696; AVX-NEXT:    callq asin@PLT
8697; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8698; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8699; AVX-NEXT:    callq asin@PLT
8700; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
8701; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8702; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
8703; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8704; AVX-NEXT:    vzeroupper
8705; AVX-NEXT:    callq asin@PLT
8706; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
8707; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
8708; AVX-NEXT:    addq $40, %rsp
8709; AVX-NEXT:    .cfi_def_cfa_offset 8
8710; AVX-NEXT:    retq
8711entry:
8712  %asin = call <3 x double> @llvm.experimental.constrained.asin.v3f64(
8713                          <3 x double> <double 42.0, double 42.1, double 42.2>,
8714                          metadata !"round.dynamic",
8715                          metadata !"fpexcept.strict") #0
8716  ret <3 x double> %asin
8717}
8718
8719define <4 x double> @constrained_vector_asin_v4f64() #0 {
8720; CHECK-LABEL: constrained_vector_asin_v4f64:
8721; CHECK:       # %bb.0: # %entry
8722; CHECK-NEXT:    subq $40, %rsp
8723; CHECK-NEXT:    .cfi_def_cfa_offset 48
8724; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8725; CHECK-NEXT:    callq asin@PLT
8726; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8727; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8728; CHECK-NEXT:    callq asin@PLT
8729; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
8730; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
8731; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8732; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
8733; CHECK-NEXT:    callq asin@PLT
8734; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8735; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8736; CHECK-NEXT:    callq asin@PLT
8737; CHECK-NEXT:    movaps %xmm0, %xmm1
8738; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
8739; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
8740; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
8741; CHECK-NEXT:    addq $40, %rsp
8742; CHECK-NEXT:    .cfi_def_cfa_offset 8
8743; CHECK-NEXT:    retq
8744;
8745; AVX-LABEL: constrained_vector_asin_v4f64:
8746; AVX:       # %bb.0: # %entry
8747; AVX-NEXT:    subq $40, %rsp
8748; AVX-NEXT:    .cfi_def_cfa_offset 48
8749; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
8750; AVX-NEXT:    callq asin@PLT
8751; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8752; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8753; AVX-NEXT:    callq asin@PLT
8754; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
8755; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8756; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8757; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8758; AVX-NEXT:    callq asin@PLT
8759; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8760; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8761; AVX-NEXT:    callq asin@PLT
8762; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
8763; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8764; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
8765; AVX-NEXT:    addq $40, %rsp
8766; AVX-NEXT:    .cfi_def_cfa_offset 8
8767; AVX-NEXT:    retq
8768entry:
8769  %asin = call <4 x double> @llvm.experimental.constrained.asin.v4f64(
8770                             <4 x double> <double 42.0, double 42.1,
8771                                           double 42.2, double 42.3>,
8772                             metadata !"round.dynamic",
8773                             metadata !"fpexcept.strict") #0
8774  ret <4 x double> %asin
8775}
8776
8777define <1 x float> @constrained_vector_atan_v1f32() #0 {
8778; CHECK-LABEL: constrained_vector_atan_v1f32:
8779; CHECK:       # %bb.0: # %entry
8780; CHECK-NEXT:    pushq %rax
8781; CHECK-NEXT:    .cfi_def_cfa_offset 16
8782; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8783; CHECK-NEXT:    callq atanf@PLT
8784; CHECK-NEXT:    popq %rax
8785; CHECK-NEXT:    .cfi_def_cfa_offset 8
8786; CHECK-NEXT:    retq
8787;
8788; AVX-LABEL: constrained_vector_atan_v1f32:
8789; AVX:       # %bb.0: # %entry
8790; AVX-NEXT:    pushq %rax
8791; AVX-NEXT:    .cfi_def_cfa_offset 16
8792; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8793; AVX-NEXT:    callq atanf@PLT
8794; AVX-NEXT:    popq %rax
8795; AVX-NEXT:    .cfi_def_cfa_offset 8
8796; AVX-NEXT:    retq
8797entry:
8798  %atan = call <1 x float> @llvm.experimental.constrained.atan.v1f32(
8799                             <1 x float> <float 42.0>,
8800                             metadata !"round.dynamic",
8801                             metadata !"fpexcept.strict") #0
8802  ret <1 x float> %atan
8803}
8804
8805define <2 x double> @constrained_vector_atan_v2f64() #0 {
8806; CHECK-LABEL: constrained_vector_atan_v2f64:
8807; CHECK:       # %bb.0: # %entry
8808; CHECK-NEXT:    subq $24, %rsp
8809; CHECK-NEXT:    .cfi_def_cfa_offset 32
8810; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8811; CHECK-NEXT:    callq atan@PLT
8812; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8813; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8814; CHECK-NEXT:    callq atan@PLT
8815; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
8816; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
8817; CHECK-NEXT:    addq $24, %rsp
8818; CHECK-NEXT:    .cfi_def_cfa_offset 8
8819; CHECK-NEXT:    retq
8820;
8821; AVX-LABEL: constrained_vector_atan_v2f64:
8822; AVX:       # %bb.0: # %entry
8823; AVX-NEXT:    subq $24, %rsp
8824; AVX-NEXT:    .cfi_def_cfa_offset 32
8825; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8826; AVX-NEXT:    callq atan@PLT
8827; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8828; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8829; AVX-NEXT:    callq atan@PLT
8830; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
8831; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8832; AVX-NEXT:    addq $24, %rsp
8833; AVX-NEXT:    .cfi_def_cfa_offset 8
8834; AVX-NEXT:    retq
8835entry:
8836  %atan = call <2 x double> @llvm.experimental.constrained.atan.v2f64(
8837                             <2 x double> <double 42.0, double 42.1>,
8838                             metadata !"round.dynamic",
8839                             metadata !"fpexcept.strict") #0
8840  ret <2 x double> %atan
8841}
8842
8843define <3 x float> @constrained_vector_atan_v3f32() #0 {
8844; CHECK-LABEL: constrained_vector_atan_v3f32:
8845; CHECK:       # %bb.0: # %entry
8846; CHECK-NEXT:    subq $40, %rsp
8847; CHECK-NEXT:    .cfi_def_cfa_offset 48
8848; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
8849; CHECK-NEXT:    callq atanf@PLT
8850; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8851; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8852; CHECK-NEXT:    callq atanf@PLT
8853; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8854; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
8855; CHECK-NEXT:    callq atanf@PLT
8856; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
8857; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8858; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
8859; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
8860; CHECK-NEXT:    movaps %xmm1, %xmm0
8861; CHECK-NEXT:    addq $40, %rsp
8862; CHECK-NEXT:    .cfi_def_cfa_offset 8
8863; CHECK-NEXT:    retq
8864;
8865; AVX-LABEL: constrained_vector_atan_v3f32:
8866; AVX:       # %bb.0: # %entry
8867; AVX-NEXT:    subq $40, %rsp
8868; AVX-NEXT:    .cfi_def_cfa_offset 48
8869; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
8870; AVX-NEXT:    callq atanf@PLT
8871; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8872; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
8873; AVX-NEXT:    callq atanf@PLT
8874; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8875; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
8876; AVX-NEXT:    callq atanf@PLT
8877; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
8878; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
8879; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
8880; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
8881; AVX-NEXT:    addq $40, %rsp
8882; AVX-NEXT:    .cfi_def_cfa_offset 8
8883; AVX-NEXT:    retq
8884entry:
8885  %atan = call <3 x float> @llvm.experimental.constrained.atan.v3f32(
8886                              <3 x float> <float 42.0, float 43.0, float 44.0>,
8887                              metadata !"round.dynamic",
8888                              metadata !"fpexcept.strict") #0
8889  ret <3 x float> %atan
8890}
8891
8892define <3 x double> @constrained_vector_atan_v3f64() #0 {
8893; CHECK-LABEL: constrained_vector_atan_v3f64:
8894; CHECK:       # %bb.0: # %entry
8895; CHECK-NEXT:    subq $24, %rsp
8896; CHECK-NEXT:    .cfi_def_cfa_offset 32
8897; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8898; CHECK-NEXT:    callq atan@PLT
8899; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
8900; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8901; CHECK-NEXT:    callq atan@PLT
8902; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
8903; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8904; CHECK-NEXT:    callq atan@PLT
8905; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
8906; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
8907; CHECK-NEXT:    wait
8908; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
8909; CHECK-NEXT:    # xmm0 = mem[0],zero
8910; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
8911; CHECK-NEXT:    # xmm1 = mem[0],zero
8912; CHECK-NEXT:    addq $24, %rsp
8913; CHECK-NEXT:    .cfi_def_cfa_offset 8
8914; CHECK-NEXT:    retq
8915;
8916; AVX-LABEL: constrained_vector_atan_v3f64:
8917; AVX:       # %bb.0: # %entry
8918; AVX-NEXT:    subq $40, %rsp
8919; AVX-NEXT:    .cfi_def_cfa_offset 48
8920; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8921; AVX-NEXT:    callq atan@PLT
8922; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8923; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8924; AVX-NEXT:    callq atan@PLT
8925; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
8926; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8927; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
8928; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8929; AVX-NEXT:    vzeroupper
8930; AVX-NEXT:    callq atan@PLT
8931; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
8932; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
8933; AVX-NEXT:    addq $40, %rsp
8934; AVX-NEXT:    .cfi_def_cfa_offset 8
8935; AVX-NEXT:    retq
8936entry:
8937  %atan = call <3 x double> @llvm.experimental.constrained.atan.v3f64(
8938                          <3 x double> <double 42.0, double 42.1, double 42.2>,
8939                          metadata !"round.dynamic",
8940                          metadata !"fpexcept.strict") #0
8941  ret <3 x double> %atan
8942}
8943
8944define <4 x double> @constrained_vector_atan_v4f64() #0 {
8945; CHECK-LABEL: constrained_vector_atan_v4f64:
8946; CHECK:       # %bb.0: # %entry
8947; CHECK-NEXT:    subq $40, %rsp
8948; CHECK-NEXT:    .cfi_def_cfa_offset 48
8949; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8950; CHECK-NEXT:    callq atan@PLT
8951; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8952; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8953; CHECK-NEXT:    callq atan@PLT
8954; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
8955; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
8956; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
8957; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
8958; CHECK-NEXT:    callq atan@PLT
8959; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8960; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8961; CHECK-NEXT:    callq atan@PLT
8962; CHECK-NEXT:    movaps %xmm0, %xmm1
8963; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
8964; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
8965; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
8966; CHECK-NEXT:    addq $40, %rsp
8967; CHECK-NEXT:    .cfi_def_cfa_offset 8
8968; CHECK-NEXT:    retq
8969;
8970; AVX-LABEL: constrained_vector_atan_v4f64:
8971; AVX:       # %bb.0: # %entry
8972; AVX-NEXT:    subq $40, %rsp
8973; AVX-NEXT:    .cfi_def_cfa_offset 48
8974; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
8975; AVX-NEXT:    callq atan@PLT
8976; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8977; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
8978; AVX-NEXT:    callq atan@PLT
8979; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
8980; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8981; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
8982; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
8983; AVX-NEXT:    callq atan@PLT
8984; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8985; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
8986; AVX-NEXT:    callq atan@PLT
8987; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
8988; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
8989; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
8990; AVX-NEXT:    addq $40, %rsp
8991; AVX-NEXT:    .cfi_def_cfa_offset 8
8992; AVX-NEXT:    retq
8993entry:
8994  %atan = call <4 x double> @llvm.experimental.constrained.atan.v4f64(
8995                             <4 x double> <double 42.0, double 42.1,
8996                                           double 42.2, double 42.3>,
8997                             metadata !"round.dynamic",
8998                             metadata !"fpexcept.strict") #0
8999  ret <4 x double> %atan
9000}
9001
9002define <1 x float> @constrained_vector_atan2_v1f32() #0 {
9003; CHECK-LABEL: constrained_vector_atan2_v1f32:
9004; CHECK:       # %bb.0: # %entry
9005; CHECK-NEXT:    pushq %rax
9006; CHECK-NEXT:    .cfi_def_cfa_offset 16
9007; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9008; CHECK-NEXT:    movss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0]
9009; CHECK-NEXT:    callq atan2f@PLT
9010; CHECK-NEXT:    popq %rax
9011; CHECK-NEXT:    .cfi_def_cfa_offset 8
9012; CHECK-NEXT:    retq
9013;
9014; AVX-LABEL: constrained_vector_atan2_v1f32:
9015; AVX:       # %bb.0: # %entry
9016; AVX-NEXT:    pushq %rax
9017; AVX-NEXT:    .cfi_def_cfa_offset 16
9018; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9019; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0]
9020; AVX-NEXT:    callq atan2f@PLT
9021; AVX-NEXT:    popq %rax
9022; AVX-NEXT:    .cfi_def_cfa_offset 8
9023; AVX-NEXT:    retq
9024entry:
9025  %atan2 = call <1 x float> @llvm.experimental.constrained.atan2.v1f32(
9026                             <1 x float> <float 42.0>,
9027                             <1 x float> <float 23.0>,
9028                             metadata !"round.dynamic",
9029                             metadata !"fpexcept.strict") #0
9030  ret <1 x float> %atan2
9031}
9032
9033define <2 x double> @constrained_vector_atan2_v2f64() #0 {
9034; CHECK-LABEL: constrained_vector_atan2_v2f64:
9035; CHECK:       # %bb.0: # %entry
9036; CHECK-NEXT:    subq $24, %rsp
9037; CHECK-NEXT:    .cfi_def_cfa_offset 32
9038; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9039; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
9040; CHECK-NEXT:    callq atan2@PLT
9041; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9042; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9043; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
9044; CHECK-NEXT:    callq atan2@PLT
9045; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
9046; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
9047; CHECK-NEXT:    addq $24, %rsp
9048; CHECK-NEXT:    .cfi_def_cfa_offset 8
9049; CHECK-NEXT:    retq
9050;
9051; AVX-LABEL: constrained_vector_atan2_v2f64:
9052; AVX:       # %bb.0: # %entry
9053; AVX-NEXT:    subq $24, %rsp
9054; AVX-NEXT:    .cfi_def_cfa_offset 32
9055; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9056; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
9057; AVX-NEXT:    callq atan2@PLT
9058; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9059; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9060; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
9061; AVX-NEXT:    callq atan2@PLT
9062; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
9063; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9064; AVX-NEXT:    addq $24, %rsp
9065; AVX-NEXT:    .cfi_def_cfa_offset 8
9066; AVX-NEXT:    retq
9067entry:
9068  %atan2 = call <2 x double> @llvm.experimental.constrained.atan2.v2f64(
9069                             <2 x double> <double 42.0, double 42.1>,
9070                             <2 x double> <double 23.0, double 23.1>,
9071                             metadata !"round.dynamic",
9072                             metadata !"fpexcept.strict") #0
9073  ret <2 x double> %atan2
9074}
9075
9076define <3 x float> @constrained_vector_atan2_v3f32() #0 {
9077; CHECK-LABEL: constrained_vector_atan2_v3f32:
9078; CHECK:       # %bb.0: # %entry
9079; CHECK-NEXT:    subq $40, %rsp
9080; CHECK-NEXT:    .cfi_def_cfa_offset 48
9081; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
9082; CHECK-NEXT:    movss {{.*#+}} xmm1 = [2.5E+1,0.0E+0,0.0E+0,0.0E+0]
9083; CHECK-NEXT:    callq atan2f@PLT
9084; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9085; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9086; CHECK-NEXT:    movss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0]
9087; CHECK-NEXT:    callq atan2f@PLT
9088; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9089; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
9090; CHECK-NEXT:    movss {{.*#+}} xmm1 = [2.4E+1,0.0E+0,0.0E+0,0.0E+0]
9091; CHECK-NEXT:    callq atan2f@PLT
9092; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
9093; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9094; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
9095; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
9096; CHECK-NEXT:    movaps %xmm1, %xmm0
9097; CHECK-NEXT:    addq $40, %rsp
9098; CHECK-NEXT:    .cfi_def_cfa_offset 8
9099; CHECK-NEXT:    retq
9100;
9101; AVX-LABEL: constrained_vector_atan2_v3f32:
9102; AVX:       # %bb.0: # %entry
9103; AVX-NEXT:    subq $40, %rsp
9104; AVX-NEXT:    .cfi_def_cfa_offset 48
9105; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
9106; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [2.5E+1,0.0E+0,0.0E+0,0.0E+0]
9107; AVX-NEXT:    callq atan2f@PLT
9108; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9109; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9110; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0]
9111; AVX-NEXT:    callq atan2f@PLT
9112; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9113; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
9114; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [2.4E+1,0.0E+0,0.0E+0,0.0E+0]
9115; AVX-NEXT:    callq atan2f@PLT
9116; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
9117; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
9118; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
9119; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
9120; AVX-NEXT:    addq $40, %rsp
9121; AVX-NEXT:    .cfi_def_cfa_offset 8
9122; AVX-NEXT:    retq
9123entry:
9124  %atan2 = call <3 x float> @llvm.experimental.constrained.atan2.v3f32(
9125                              <3 x float> <float 42.0, float 43.0, float 44.0>,
9126                              <3 x float> <float 23.0, float 24.0, float 25.0>,
9127                              metadata !"round.dynamic",
9128                              metadata !"fpexcept.strict") #0
9129  ret <3 x float> %atan2
9130}
9131
9132define <3 x double> @constrained_vector_atan2_v3f64() #0 {
9133; CHECK-LABEL: constrained_vector_atan2_v3f64:
9134; CHECK:       # %bb.0: # %entry
9135; CHECK-NEXT:    subq $24, %rsp
9136; CHECK-NEXT:    .cfi_def_cfa_offset 32
9137; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9138; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
9139; CHECK-NEXT:    callq atan2@PLT
9140; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
9141; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9142; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
9143; CHECK-NEXT:    callq atan2@PLT
9144; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
9145; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9146; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0]
9147; CHECK-NEXT:    callq atan2@PLT
9148; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
9149; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
9150; CHECK-NEXT:    wait
9151; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
9152; CHECK-NEXT:    # xmm0 = mem[0],zero
9153; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
9154; CHECK-NEXT:    # xmm1 = mem[0],zero
9155; CHECK-NEXT:    addq $24, %rsp
9156; CHECK-NEXT:    .cfi_def_cfa_offset 8
9157; CHECK-NEXT:    retq
9158;
9159; AVX-LABEL: constrained_vector_atan2_v3f64:
9160; AVX:       # %bb.0: # %entry
9161; AVX-NEXT:    subq $40, %rsp
9162; AVX-NEXT:    .cfi_def_cfa_offset 48
9163; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9164; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
9165; AVX-NEXT:    callq atan2@PLT
9166; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9167; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9168; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
9169; AVX-NEXT:    callq atan2@PLT
9170; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
9171; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9172; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
9173; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9174; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0]
9175; AVX-NEXT:    vzeroupper
9176; AVX-NEXT:    callq atan2@PLT
9177; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
9178; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
9179; AVX-NEXT:    addq $40, %rsp
9180; AVX-NEXT:    .cfi_def_cfa_offset 8
9181; AVX-NEXT:    retq
9182entry:
9183  %atan2 = call <3 x double> @llvm.experimental.constrained.atan2.v3f64(
9184                          <3 x double> <double 42.0, double 42.1, double 42.2>,
9185                          <3 x double> <double 23.0, double 23.1, double 23.2>,
9186                          metadata !"round.dynamic",
9187                          metadata !"fpexcept.strict") #0
9188  ret <3 x double> %atan2
9189}
9190
9191define <4 x double> @constrained_vector_atan2_v4f64() #0 {
9192; CHECK-LABEL: constrained_vector_atan2_v4f64:
9193; CHECK:       # %bb.0: # %entry
9194; CHECK-NEXT:    subq $40, %rsp
9195; CHECK-NEXT:    .cfi_def_cfa_offset 48
9196; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9197; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
9198; CHECK-NEXT:    callq atan2@PLT
9199; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9200; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9201; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
9202; CHECK-NEXT:    callq atan2@PLT
9203; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
9204; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
9205; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9206; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
9207; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3300000000000001E+1,0.0E+0]
9208; CHECK-NEXT:    callq atan2@PLT
9209; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9210; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9211; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0]
9212; CHECK-NEXT:    callq atan2@PLT
9213; CHECK-NEXT:    movaps %xmm0, %xmm1
9214; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
9215; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
9216; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
9217; CHECK-NEXT:    addq $40, %rsp
9218; CHECK-NEXT:    .cfi_def_cfa_offset 8
9219; CHECK-NEXT:    retq
9220;
9221; AVX-LABEL: constrained_vector_atan2_v4f64:
9222; AVX:       # %bb.0: # %entry
9223; AVX-NEXT:    subq $40, %rsp
9224; AVX-NEXT:    .cfi_def_cfa_offset 48
9225; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
9226; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3300000000000001E+1,0.0E+0]
9227; AVX-NEXT:    callq atan2@PLT
9228; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9229; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9230; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0]
9231; AVX-NEXT:    callq atan2@PLT
9232; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
9233; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9234; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9235; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9236; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
9237; AVX-NEXT:    callq atan2@PLT
9238; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9239; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9240; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
9241; AVX-NEXT:    callq atan2@PLT
9242; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
9243; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9244; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
9245; AVX-NEXT:    addq $40, %rsp
9246; AVX-NEXT:    .cfi_def_cfa_offset 8
9247; AVX-NEXT:    retq
9248entry:
9249  %atan2 = call <4 x double> @llvm.experimental.constrained.atan2.v4f64(
9250                             <4 x double> <double 42.0, double 42.1,
9251                                           double 42.2, double 42.3>,
9252                             <4 x double> <double 23.0, double 23.1,
9253                                           double 23.2, double 23.3>,
9254                             metadata !"round.dynamic",
9255                             metadata !"fpexcept.strict") #0
9256  ret <4 x double> %atan2
9257}
9258
9259define <1 x float> @constrained_vector_cosh_v1f32() #0 {
9260; CHECK-LABEL: constrained_vector_cosh_v1f32:
9261; CHECK:       # %bb.0: # %entry
9262; CHECK-NEXT:    pushq %rax
9263; CHECK-NEXT:    .cfi_def_cfa_offset 16
9264; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9265; CHECK-NEXT:    callq coshf@PLT
9266; CHECK-NEXT:    popq %rax
9267; CHECK-NEXT:    .cfi_def_cfa_offset 8
9268; CHECK-NEXT:    retq
9269;
9270; AVX-LABEL: constrained_vector_cosh_v1f32:
9271; AVX:       # %bb.0: # %entry
9272; AVX-NEXT:    pushq %rax
9273; AVX-NEXT:    .cfi_def_cfa_offset 16
9274; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9275; AVX-NEXT:    callq coshf@PLT
9276; AVX-NEXT:    popq %rax
9277; AVX-NEXT:    .cfi_def_cfa_offset 8
9278; AVX-NEXT:    retq
9279entry:
9280  %cosh = call <1 x float> @llvm.experimental.constrained.cosh.v1f32(
9281                             <1 x float> <float 42.0>,
9282                             metadata !"round.dynamic",
9283                             metadata !"fpexcept.strict") #0
9284  ret <1 x float> %cosh
9285}
9286
9287define <2 x double> @constrained_vector_cosh_v2f64() #0 {
9288; CHECK-LABEL: constrained_vector_cosh_v2f64:
9289; CHECK:       # %bb.0: # %entry
9290; CHECK-NEXT:    subq $24, %rsp
9291; CHECK-NEXT:    .cfi_def_cfa_offset 32
9292; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9293; CHECK-NEXT:    callq cosh@PLT
9294; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9295; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9296; CHECK-NEXT:    callq cosh@PLT
9297; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
9298; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
9299; CHECK-NEXT:    addq $24, %rsp
9300; CHECK-NEXT:    .cfi_def_cfa_offset 8
9301; CHECK-NEXT:    retq
9302;
9303; AVX-LABEL: constrained_vector_cosh_v2f64:
9304; AVX:       # %bb.0: # %entry
9305; AVX-NEXT:    subq $24, %rsp
9306; AVX-NEXT:    .cfi_def_cfa_offset 32
9307; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9308; AVX-NEXT:    callq cosh@PLT
9309; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9310; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9311; AVX-NEXT:    callq cosh@PLT
9312; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
9313; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9314; AVX-NEXT:    addq $24, %rsp
9315; AVX-NEXT:    .cfi_def_cfa_offset 8
9316; AVX-NEXT:    retq
9317entry:
9318  %cosh = call <2 x double> @llvm.experimental.constrained.cosh.v2f64(
9319                             <2 x double> <double 42.0, double 42.1>,
9320                             metadata !"round.dynamic",
9321                             metadata !"fpexcept.strict") #0
9322  ret <2 x double> %cosh
9323}
9324
9325define <3 x float> @constrained_vector_cosh_v3f32() #0 {
9326; CHECK-LABEL: constrained_vector_cosh_v3f32:
9327; CHECK:       # %bb.0: # %entry
9328; CHECK-NEXT:    subq $40, %rsp
9329; CHECK-NEXT:    .cfi_def_cfa_offset 48
9330; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
9331; CHECK-NEXT:    callq coshf@PLT
9332; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9333; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9334; CHECK-NEXT:    callq coshf@PLT
9335; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9336; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
9337; CHECK-NEXT:    callq coshf@PLT
9338; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
9339; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9340; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
9341; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
9342; CHECK-NEXT:    movaps %xmm1, %xmm0
9343; CHECK-NEXT:    addq $40, %rsp
9344; CHECK-NEXT:    .cfi_def_cfa_offset 8
9345; CHECK-NEXT:    retq
9346;
9347; AVX-LABEL: constrained_vector_cosh_v3f32:
9348; AVX:       # %bb.0: # %entry
9349; AVX-NEXT:    subq $40, %rsp
9350; AVX-NEXT:    .cfi_def_cfa_offset 48
9351; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
9352; AVX-NEXT:    callq coshf@PLT
9353; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9354; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9355; AVX-NEXT:    callq coshf@PLT
9356; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9357; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
9358; AVX-NEXT:    callq coshf@PLT
9359; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
9360; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
9361; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
9362; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
9363; AVX-NEXT:    addq $40, %rsp
9364; AVX-NEXT:    .cfi_def_cfa_offset 8
9365; AVX-NEXT:    retq
9366entry:
9367  %cosh = call <3 x float> @llvm.experimental.constrained.cosh.v3f32(
9368                              <3 x float> <float 42.0, float 43.0, float 44.0>,
9369                              metadata !"round.dynamic",
9370                              metadata !"fpexcept.strict") #0
9371  ret <3 x float> %cosh
9372}
9373
9374define <3 x double> @constrained_vector_cosh_v3f64() #0 {
9375; CHECK-LABEL: constrained_vector_cosh_v3f64:
9376; CHECK:       # %bb.0: # %entry
9377; CHECK-NEXT:    subq $24, %rsp
9378; CHECK-NEXT:    .cfi_def_cfa_offset 32
9379; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9380; CHECK-NEXT:    callq cosh@PLT
9381; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
9382; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9383; CHECK-NEXT:    callq cosh@PLT
9384; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
9385; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9386; CHECK-NEXT:    callq cosh@PLT
9387; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
9388; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
9389; CHECK-NEXT:    wait
9390; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
9391; CHECK-NEXT:    # xmm0 = mem[0],zero
9392; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
9393; CHECK-NEXT:    # xmm1 = mem[0],zero
9394; CHECK-NEXT:    addq $24, %rsp
9395; CHECK-NEXT:    .cfi_def_cfa_offset 8
9396; CHECK-NEXT:    retq
9397;
9398; AVX-LABEL: constrained_vector_cosh_v3f64:
9399; AVX:       # %bb.0: # %entry
9400; AVX-NEXT:    subq $40, %rsp
9401; AVX-NEXT:    .cfi_def_cfa_offset 48
9402; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9403; AVX-NEXT:    callq cosh@PLT
9404; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9405; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9406; AVX-NEXT:    callq cosh@PLT
9407; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
9408; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9409; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
9410; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9411; AVX-NEXT:    vzeroupper
9412; AVX-NEXT:    callq cosh@PLT
9413; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
9414; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
9415; AVX-NEXT:    addq $40, %rsp
9416; AVX-NEXT:    .cfi_def_cfa_offset 8
9417; AVX-NEXT:    retq
9418entry:
9419  %cosh = call <3 x double> @llvm.experimental.constrained.cosh.v3f64(
9420                          <3 x double> <double 42.0, double 42.1, double 42.2>,
9421                          metadata !"round.dynamic",
9422                          metadata !"fpexcept.strict") #0
9423  ret <3 x double> %cosh
9424}
9425
9426define <4 x double> @constrained_vector_cosh_v4f64() #0 {
9427; CHECK-LABEL: constrained_vector_cosh_v4f64:
9428; CHECK:       # %bb.0: # %entry
9429; CHECK-NEXT:    subq $40, %rsp
9430; CHECK-NEXT:    .cfi_def_cfa_offset 48
9431; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9432; CHECK-NEXT:    callq cosh@PLT
9433; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9434; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9435; CHECK-NEXT:    callq cosh@PLT
9436; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
9437; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
9438; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9439; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
9440; CHECK-NEXT:    callq cosh@PLT
9441; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9442; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9443; CHECK-NEXT:    callq cosh@PLT
9444; CHECK-NEXT:    movaps %xmm0, %xmm1
9445; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
9446; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
9447; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
9448; CHECK-NEXT:    addq $40, %rsp
9449; CHECK-NEXT:    .cfi_def_cfa_offset 8
9450; CHECK-NEXT:    retq
9451;
9452; AVX-LABEL: constrained_vector_cosh_v4f64:
9453; AVX:       # %bb.0: # %entry
9454; AVX-NEXT:    subq $40, %rsp
9455; AVX-NEXT:    .cfi_def_cfa_offset 48
9456; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
9457; AVX-NEXT:    callq cosh@PLT
9458; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9459; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9460; AVX-NEXT:    callq cosh@PLT
9461; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
9462; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9463; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9464; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9465; AVX-NEXT:    callq cosh@PLT
9466; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9467; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9468; AVX-NEXT:    callq cosh@PLT
9469; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
9470; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9471; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
9472; AVX-NEXT:    addq $40, %rsp
9473; AVX-NEXT:    .cfi_def_cfa_offset 8
9474; AVX-NEXT:    retq
9475entry:
9476  %cosh = call <4 x double> @llvm.experimental.constrained.cosh.v4f64(
9477                             <4 x double> <double 42.0, double 42.1,
9478                                           double 42.2, double 42.3>,
9479                             metadata !"round.dynamic",
9480                             metadata !"fpexcept.strict") #0
9481  ret <4 x double> %cosh
9482}
9483
9484define <1 x float> @constrained_vector_sinh_v1f32() #0 {
9485; CHECK-LABEL: constrained_vector_sinh_v1f32:
9486; CHECK:       # %bb.0: # %entry
9487; CHECK-NEXT:    pushq %rax
9488; CHECK-NEXT:    .cfi_def_cfa_offset 16
9489; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9490; CHECK-NEXT:    callq sinhf@PLT
9491; CHECK-NEXT:    popq %rax
9492; CHECK-NEXT:    .cfi_def_cfa_offset 8
9493; CHECK-NEXT:    retq
9494;
9495; AVX-LABEL: constrained_vector_sinh_v1f32:
9496; AVX:       # %bb.0: # %entry
9497; AVX-NEXT:    pushq %rax
9498; AVX-NEXT:    .cfi_def_cfa_offset 16
9499; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9500; AVX-NEXT:    callq sinhf@PLT
9501; AVX-NEXT:    popq %rax
9502; AVX-NEXT:    .cfi_def_cfa_offset 8
9503; AVX-NEXT:    retq
9504entry:
9505  %sinh = call <1 x float> @llvm.experimental.constrained.sinh.v1f32(
9506                             <1 x float> <float 42.0>,
9507                             metadata !"round.dynamic",
9508                             metadata !"fpexcept.strict") #0
9509  ret <1 x float> %sinh
9510}
9511
9512define <2 x double> @constrained_vector_sinh_v2f64() #0 {
9513; CHECK-LABEL: constrained_vector_sinh_v2f64:
9514; CHECK:       # %bb.0: # %entry
9515; CHECK-NEXT:    subq $24, %rsp
9516; CHECK-NEXT:    .cfi_def_cfa_offset 32
9517; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9518; CHECK-NEXT:    callq sinh@PLT
9519; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9520; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9521; CHECK-NEXT:    callq sinh@PLT
9522; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
9523; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
9524; CHECK-NEXT:    addq $24, %rsp
9525; CHECK-NEXT:    .cfi_def_cfa_offset 8
9526; CHECK-NEXT:    retq
9527;
9528; AVX-LABEL: constrained_vector_sinh_v2f64:
9529; AVX:       # %bb.0: # %entry
9530; AVX-NEXT:    subq $24, %rsp
9531; AVX-NEXT:    .cfi_def_cfa_offset 32
9532; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9533; AVX-NEXT:    callq sinh@PLT
9534; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9535; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9536; AVX-NEXT:    callq sinh@PLT
9537; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
9538; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9539; AVX-NEXT:    addq $24, %rsp
9540; AVX-NEXT:    .cfi_def_cfa_offset 8
9541; AVX-NEXT:    retq
9542entry:
9543  %sinh = call <2 x double> @llvm.experimental.constrained.sinh.v2f64(
9544                             <2 x double> <double 42.0, double 42.1>,
9545                             metadata !"round.dynamic",
9546                             metadata !"fpexcept.strict") #0
9547  ret <2 x double> %sinh
9548}
9549
9550define <3 x float> @constrained_vector_sinh_v3f32() #0 {
9551; CHECK-LABEL: constrained_vector_sinh_v3f32:
9552; CHECK:       # %bb.0: # %entry
9553; CHECK-NEXT:    subq $40, %rsp
9554; CHECK-NEXT:    .cfi_def_cfa_offset 48
9555; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
9556; CHECK-NEXT:    callq sinhf@PLT
9557; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9558; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9559; CHECK-NEXT:    callq sinhf@PLT
9560; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9561; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
9562; CHECK-NEXT:    callq sinhf@PLT
9563; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
9564; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9565; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
9566; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
9567; CHECK-NEXT:    movaps %xmm1, %xmm0
9568; CHECK-NEXT:    addq $40, %rsp
9569; CHECK-NEXT:    .cfi_def_cfa_offset 8
9570; CHECK-NEXT:    retq
9571;
9572; AVX-LABEL: constrained_vector_sinh_v3f32:
9573; AVX:       # %bb.0: # %entry
9574; AVX-NEXT:    subq $40, %rsp
9575; AVX-NEXT:    .cfi_def_cfa_offset 48
9576; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
9577; AVX-NEXT:    callq sinhf@PLT
9578; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9579; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9580; AVX-NEXT:    callq sinhf@PLT
9581; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9582; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
9583; AVX-NEXT:    callq sinhf@PLT
9584; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
9585; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
9586; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
9587; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
9588; AVX-NEXT:    addq $40, %rsp
9589; AVX-NEXT:    .cfi_def_cfa_offset 8
9590; AVX-NEXT:    retq
9591entry:
9592  %sinh = call <3 x float> @llvm.experimental.constrained.sinh.v3f32(
9593                              <3 x float> <float 42.0, float 43.0, float 44.0>,
9594                              metadata !"round.dynamic",
9595                              metadata !"fpexcept.strict") #0
9596  ret <3 x float> %sinh
9597}
9598
9599define <3 x double> @constrained_vector_sinh_v3f64() #0 {
9600; CHECK-LABEL: constrained_vector_sinh_v3f64:
9601; CHECK:       # %bb.0: # %entry
9602; CHECK-NEXT:    subq $24, %rsp
9603; CHECK-NEXT:    .cfi_def_cfa_offset 32
9604; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9605; CHECK-NEXT:    callq sinh@PLT
9606; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
9607; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9608; CHECK-NEXT:    callq sinh@PLT
9609; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
9610; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9611; CHECK-NEXT:    callq sinh@PLT
9612; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
9613; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
9614; CHECK-NEXT:    wait
9615; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
9616; CHECK-NEXT:    # xmm0 = mem[0],zero
9617; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
9618; CHECK-NEXT:    # xmm1 = mem[0],zero
9619; CHECK-NEXT:    addq $24, %rsp
9620; CHECK-NEXT:    .cfi_def_cfa_offset 8
9621; CHECK-NEXT:    retq
9622;
9623; AVX-LABEL: constrained_vector_sinh_v3f64:
9624; AVX:       # %bb.0: # %entry
9625; AVX-NEXT:    subq $40, %rsp
9626; AVX-NEXT:    .cfi_def_cfa_offset 48
9627; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9628; AVX-NEXT:    callq sinh@PLT
9629; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9630; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9631; AVX-NEXT:    callq sinh@PLT
9632; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
9633; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9634; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
9635; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9636; AVX-NEXT:    vzeroupper
9637; AVX-NEXT:    callq sinh@PLT
9638; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
9639; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
9640; AVX-NEXT:    addq $40, %rsp
9641; AVX-NEXT:    .cfi_def_cfa_offset 8
9642; AVX-NEXT:    retq
9643entry:
9644  %sinh = call <3 x double> @llvm.experimental.constrained.sinh.v3f64(
9645                          <3 x double> <double 42.0, double 42.1, double 42.2>,
9646                          metadata !"round.dynamic",
9647                          metadata !"fpexcept.strict") #0
9648  ret <3 x double> %sinh
9649}
9650
9651define <4 x double> @constrained_vector_sinh_v4f64() #0 {
9652; CHECK-LABEL: constrained_vector_sinh_v4f64:
9653; CHECK:       # %bb.0: # %entry
9654; CHECK-NEXT:    subq $40, %rsp
9655; CHECK-NEXT:    .cfi_def_cfa_offset 48
9656; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9657; CHECK-NEXT:    callq sinh@PLT
9658; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9659; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9660; CHECK-NEXT:    callq sinh@PLT
9661; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
9662; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
9663; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9664; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
9665; CHECK-NEXT:    callq sinh@PLT
9666; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9667; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9668; CHECK-NEXT:    callq sinh@PLT
9669; CHECK-NEXT:    movaps %xmm0, %xmm1
9670; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
9671; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
9672; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
9673; CHECK-NEXT:    addq $40, %rsp
9674; CHECK-NEXT:    .cfi_def_cfa_offset 8
9675; CHECK-NEXT:    retq
9676;
9677; AVX-LABEL: constrained_vector_sinh_v4f64:
9678; AVX:       # %bb.0: # %entry
9679; AVX-NEXT:    subq $40, %rsp
9680; AVX-NEXT:    .cfi_def_cfa_offset 48
9681; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
9682; AVX-NEXT:    callq sinh@PLT
9683; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9684; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9685; AVX-NEXT:    callq sinh@PLT
9686; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
9687; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9688; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9689; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9690; AVX-NEXT:    callq sinh@PLT
9691; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9692; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9693; AVX-NEXT:    callq sinh@PLT
9694; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
9695; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9696; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
9697; AVX-NEXT:    addq $40, %rsp
9698; AVX-NEXT:    .cfi_def_cfa_offset 8
9699; AVX-NEXT:    retq
9700entry:
9701  %sinh = call <4 x double> @llvm.experimental.constrained.sinh.v4f64(
9702                             <4 x double> <double 42.0, double 42.1,
9703                                           double 42.2, double 42.3>,
9704                             metadata !"round.dynamic",
9705                             metadata !"fpexcept.strict") #0
9706  ret <4 x double> %sinh
9707}
9708
9709define <1 x float> @constrained_vector_tanh_v1f32() #0 {
9710; CHECK-LABEL: constrained_vector_tanh_v1f32:
9711; CHECK:       # %bb.0: # %entry
9712; CHECK-NEXT:    pushq %rax
9713; CHECK-NEXT:    .cfi_def_cfa_offset 16
9714; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9715; CHECK-NEXT:    callq tanhf@PLT
9716; CHECK-NEXT:    popq %rax
9717; CHECK-NEXT:    .cfi_def_cfa_offset 8
9718; CHECK-NEXT:    retq
9719;
9720; AVX-LABEL: constrained_vector_tanh_v1f32:
9721; AVX:       # %bb.0: # %entry
9722; AVX-NEXT:    pushq %rax
9723; AVX-NEXT:    .cfi_def_cfa_offset 16
9724; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9725; AVX-NEXT:    callq tanhf@PLT
9726; AVX-NEXT:    popq %rax
9727; AVX-NEXT:    .cfi_def_cfa_offset 8
9728; AVX-NEXT:    retq
9729entry:
9730  %tanh = call <1 x float> @llvm.experimental.constrained.tanh.v1f32(
9731                             <1 x float> <float 42.0>,
9732                             metadata !"round.dynamic",
9733                             metadata !"fpexcept.strict") #0
9734  ret <1 x float> %tanh
9735}
9736
9737define <2 x double> @constrained_vector_tanh_v2f64() #0 {
9738; CHECK-LABEL: constrained_vector_tanh_v2f64:
9739; CHECK:       # %bb.0: # %entry
9740; CHECK-NEXT:    subq $24, %rsp
9741; CHECK-NEXT:    .cfi_def_cfa_offset 32
9742; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9743; CHECK-NEXT:    callq tanh@PLT
9744; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9745; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9746; CHECK-NEXT:    callq tanh@PLT
9747; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
9748; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
9749; CHECK-NEXT:    addq $24, %rsp
9750; CHECK-NEXT:    .cfi_def_cfa_offset 8
9751; CHECK-NEXT:    retq
9752;
9753; AVX-LABEL: constrained_vector_tanh_v2f64:
9754; AVX:       # %bb.0: # %entry
9755; AVX-NEXT:    subq $24, %rsp
9756; AVX-NEXT:    .cfi_def_cfa_offset 32
9757; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9758; AVX-NEXT:    callq tanh@PLT
9759; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9760; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9761; AVX-NEXT:    callq tanh@PLT
9762; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
9763; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9764; AVX-NEXT:    addq $24, %rsp
9765; AVX-NEXT:    .cfi_def_cfa_offset 8
9766; AVX-NEXT:    retq
9767entry:
9768  %tanh = call <2 x double> @llvm.experimental.constrained.tanh.v2f64(
9769                             <2 x double> <double 42.0, double 42.1>,
9770                             metadata !"round.dynamic",
9771                             metadata !"fpexcept.strict") #0
9772  ret <2 x double> %tanh
9773}
9774
9775define <3 x float> @constrained_vector_tanh_v3f32() #0 {
9776; CHECK-LABEL: constrained_vector_tanh_v3f32:
9777; CHECK:       # %bb.0: # %entry
9778; CHECK-NEXT:    subq $40, %rsp
9779; CHECK-NEXT:    .cfi_def_cfa_offset 48
9780; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
9781; CHECK-NEXT:    callq tanhf@PLT
9782; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9783; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9784; CHECK-NEXT:    callq tanhf@PLT
9785; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9786; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
9787; CHECK-NEXT:    callq tanhf@PLT
9788; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
9789; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9790; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
9791; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
9792; CHECK-NEXT:    movaps %xmm1, %xmm0
9793; CHECK-NEXT:    addq $40, %rsp
9794; CHECK-NEXT:    .cfi_def_cfa_offset 8
9795; CHECK-NEXT:    retq
9796;
9797; AVX-LABEL: constrained_vector_tanh_v3f32:
9798; AVX:       # %bb.0: # %entry
9799; AVX-NEXT:    subq $40, %rsp
9800; AVX-NEXT:    .cfi_def_cfa_offset 48
9801; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
9802; AVX-NEXT:    callq tanhf@PLT
9803; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9804; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
9805; AVX-NEXT:    callq tanhf@PLT
9806; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9807; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
9808; AVX-NEXT:    callq tanhf@PLT
9809; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
9810; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
9811; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
9812; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
9813; AVX-NEXT:    addq $40, %rsp
9814; AVX-NEXT:    .cfi_def_cfa_offset 8
9815; AVX-NEXT:    retq
9816entry:
9817  %tanh = call <3 x float> @llvm.experimental.constrained.tanh.v3f32(
9818                              <3 x float> <float 42.0, float 43.0, float 44.0>,
9819                              metadata !"round.dynamic",
9820                              metadata !"fpexcept.strict") #0
9821  ret <3 x float> %tanh
9822}
9823
9824define <3 x double> @constrained_vector_tanh_v3f64() #0 {
9825; CHECK-LABEL: constrained_vector_tanh_v3f64:
9826; CHECK:       # %bb.0: # %entry
9827; CHECK-NEXT:    subq $24, %rsp
9828; CHECK-NEXT:    .cfi_def_cfa_offset 32
9829; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9830; CHECK-NEXT:    callq tanh@PLT
9831; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
9832; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9833; CHECK-NEXT:    callq tanh@PLT
9834; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
9835; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9836; CHECK-NEXT:    callq tanh@PLT
9837; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
9838; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
9839; CHECK-NEXT:    wait
9840; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
9841; CHECK-NEXT:    # xmm0 = mem[0],zero
9842; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
9843; CHECK-NEXT:    # xmm1 = mem[0],zero
9844; CHECK-NEXT:    addq $24, %rsp
9845; CHECK-NEXT:    .cfi_def_cfa_offset 8
9846; CHECK-NEXT:    retq
9847;
9848; AVX-LABEL: constrained_vector_tanh_v3f64:
9849; AVX:       # %bb.0: # %entry
9850; AVX-NEXT:    subq $40, %rsp
9851; AVX-NEXT:    .cfi_def_cfa_offset 48
9852; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9853; AVX-NEXT:    callq tanh@PLT
9854; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9855; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9856; AVX-NEXT:    callq tanh@PLT
9857; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
9858; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9859; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
9860; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9861; AVX-NEXT:    vzeroupper
9862; AVX-NEXT:    callq tanh@PLT
9863; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
9864; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
9865; AVX-NEXT:    addq $40, %rsp
9866; AVX-NEXT:    .cfi_def_cfa_offset 8
9867; AVX-NEXT:    retq
9868entry:
9869  %tanh = call <3 x double> @llvm.experimental.constrained.tanh.v3f64(
9870                          <3 x double> <double 42.0, double 42.1, double 42.2>,
9871                          metadata !"round.dynamic",
9872                          metadata !"fpexcept.strict") #0
9873  ret <3 x double> %tanh
9874}
9875
9876define <4 x double> @constrained_vector_tanh_v4f64() #0 {
9877; CHECK-LABEL: constrained_vector_tanh_v4f64:
9878; CHECK:       # %bb.0: # %entry
9879; CHECK-NEXT:    subq $40, %rsp
9880; CHECK-NEXT:    .cfi_def_cfa_offset 48
9881; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9882; CHECK-NEXT:    callq tanh@PLT
9883; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9884; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9885; CHECK-NEXT:    callq tanh@PLT
9886; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
9887; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
9888; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
9889; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
9890; CHECK-NEXT:    callq tanh@PLT
9891; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9892; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9893; CHECK-NEXT:    callq tanh@PLT
9894; CHECK-NEXT:    movaps %xmm0, %xmm1
9895; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
9896; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
9897; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
9898; CHECK-NEXT:    addq $40, %rsp
9899; CHECK-NEXT:    .cfi_def_cfa_offset 8
9900; CHECK-NEXT:    retq
9901;
9902; AVX-LABEL: constrained_vector_tanh_v4f64:
9903; AVX:       # %bb.0: # %entry
9904; AVX-NEXT:    subq $40, %rsp
9905; AVX-NEXT:    .cfi_def_cfa_offset 48
9906; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
9907; AVX-NEXT:    callq tanh@PLT
9908; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9909; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
9910; AVX-NEXT:    callq tanh@PLT
9911; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
9912; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9913; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
9914; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
9915; AVX-NEXT:    callq tanh@PLT
9916; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
9917; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
9918; AVX-NEXT:    callq tanh@PLT
9919; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
9920; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
9921; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
9922; AVX-NEXT:    addq $40, %rsp
9923; AVX-NEXT:    .cfi_def_cfa_offset 8
9924; AVX-NEXT:    retq
9925entry:
9926  %tanh = call <4 x double> @llvm.experimental.constrained.tanh.v4f64(
9927                             <4 x double> <double 42.0, double 42.1,
9928                                           double 42.2, double 42.3>,
9929                             metadata !"round.dynamic",
9930                             metadata !"fpexcept.strict") #0
9931  ret <4 x double> %tanh
9932}
9933
9934declare  <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata)
9935
9936attributes #0 = { strictfp }
9937
9938; Single width declarations
9939declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
9940declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
9941declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
9942declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
9943declare <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double>, <2 x double>, metadata, metadata)
9944declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
9945declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata)
9946declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata)
9947declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata)
9948declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata)
9949declare <2 x double> @llvm.experimental.constrained.tan.v2f64(<2 x double>, metadata, metadata)
9950declare <2 x double> @llvm.experimental.constrained.asin.v2f64(<2 x double>, metadata, metadata)
9951declare <2 x double> @llvm.experimental.constrained.acos.v2f64(<2 x double>, metadata, metadata)
9952declare <2 x double> @llvm.experimental.constrained.atan.v2f64(<2 x double>, metadata, metadata)
9953declare <2 x double> @llvm.experimental.constrained.sinh.v2f64(<2 x double>, metadata, metadata)
9954declare <2 x double> @llvm.experimental.constrained.cosh.v2f64(<2 x double>, metadata, metadata)
9955declare <2 x double> @llvm.experimental.constrained.tanh.v2f64(<2 x double>, metadata, metadata)
9956declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)
9957declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata)
9958declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata)
9959declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata)
9960declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata)
9961declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
9962declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
9963declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata)
9964declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata)
9965declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float>, metadata)
9966declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float>, metadata)
9967declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata)
9968declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata)
9969declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float>, metadata)
9970declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float>, metadata)
9971declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata)
9972declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata)
9973declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
9974declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
9975declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata)
9976declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata)
9977declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata)
9978declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata)
9979declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
9980declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32>, metadata, metadata)
9981declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
9982declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64>, metadata, metadata)
9983declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32>, metadata, metadata)
9984declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32>, metadata, metadata)
9985declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>, metadata, metadata)
9986declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64>, metadata, metadata)
9987
9988; Scalar width declarations
9989declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
9990declare <1 x float> @llvm.experimental.constrained.fsub.v1f32(<1 x float>, <1 x float>, metadata, metadata)
9991declare <1 x float> @llvm.experimental.constrained.fmul.v1f32(<1 x float>, <1 x float>, metadata, metadata)
9992declare <1 x float> @llvm.experimental.constrained.fdiv.v1f32(<1 x float>, <1 x float>, metadata, metadata)
9993declare <1 x float> @llvm.experimental.constrained.frem.v1f32(<1 x float>, <1 x float>, metadata, metadata)
9994declare <1 x float> @llvm.experimental.constrained.sqrt.v1f32(<1 x float>, metadata, metadata)
9995declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x float>, metadata, metadata)
9996declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata)
9997declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata)
9998declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata)
9999declare <1 x float> @llvm.experimental.constrained.tan.v1f32(<1 x float>, metadata, metadata)
10000declare <1 x float> @llvm.experimental.constrained.asin.v1f32(<1 x float>, metadata, metadata)
10001declare <1 x float> @llvm.experimental.constrained.acos.v1f32(<1 x float>, metadata, metadata)
10002declare <1 x float> @llvm.experimental.constrained.atan.v1f32(<1 x float>, metadata, metadata)
10003declare <1 x float> @llvm.experimental.constrained.sinh.v1f32(<1 x float>, metadata, metadata)
10004declare <1 x float> @llvm.experimental.constrained.cosh.v1f32(<1 x float>, metadata, metadata)
10005declare <1 x float> @llvm.experimental.constrained.tanh.v1f32(<1 x float>, metadata, metadata)
10006declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata)
10007declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata)
10008declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata)
10009declare <1 x float> @llvm.experimental.constrained.log10.v1f32(<1 x float>, metadata, metadata)
10010declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata)
10011declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata)
10012declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
10013declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata)
10014declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata)
10015declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32(<1 x float>, metadata)
10016declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32(<1 x float>, metadata)
10017declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(<1 x double>, metadata)
10018declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double>, metadata)
10019declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32(<1 x float>, metadata)
10020declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32(<1 x float>, metadata)
10021declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(<1 x double>, metadata)
10022declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double>, metadata)
10023declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
10024declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
10025declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata)
10026declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata)
10027declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata)
10028declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata)
10029declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32>, metadata, metadata)
10030declare <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32>, metadata, metadata)
10031declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64>, metadata, metadata)
10032declare <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i64(<1 x i64>, metadata, metadata)
10033declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32>, metadata, metadata)
10034declare <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32>, metadata, metadata)
10035declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64>, metadata, metadata)
10036declare <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i64(<1 x i64>, metadata, metadata)
10037
10038; Illegal width declarations
10039declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
10040declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata)
10041declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
10042declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata)
10043declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
10044declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata)
10045declare <3 x float> @llvm.experimental.constrained.fdiv.v3f32(<3 x float>, <3 x float>, metadata, metadata)
10046declare <3 x double> @llvm.experimental.constrained.fdiv.v3f64(<3 x double>, <3 x double>, metadata, metadata)
10047declare <3 x float> @llvm.experimental.constrained.frem.v3f32(<3 x float>, <3 x float>, metadata, metadata)
10048declare <3 x double> @llvm.experimental.constrained.frem.v3f64(<3 x double>, <3 x double>, metadata, metadata)
10049declare <3 x float> @llvm.experimental.constrained.sqrt.v3f32(<3 x float>, metadata, metadata)
10050declare <3 x double> @llvm.experimental.constrained.sqrt.v3f64(<3 x double>, metadata, metadata)
10051declare <3 x float> @llvm.experimental.constrained.pow.v3f32(<3 x float>, <3 x float>, metadata, metadata)
10052declare <3 x double> @llvm.experimental.constrained.pow.v3f64(<3 x double>, <3 x double>, metadata, metadata)
10053declare <3 x float> @llvm.experimental.constrained.powi.v3f32(<3 x float>, i32, metadata, metadata)
10054declare <3 x double> @llvm.experimental.constrained.powi.v3f64(<3 x double>, i32, metadata, metadata)
10055declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metadata, metadata)
10056declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata)
10057declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata)
10058declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata)
10059declare <3 x float> @llvm.experimental.constrained.tan.v3f32(<3 x float>, metadata, metadata)
10060declare <3 x double> @llvm.experimental.constrained.tan.v3f64(<3 x double>, metadata, metadata)
10061declare <3 x float> @llvm.experimental.constrained.asin.v3f32(<3 x float>, metadata, metadata)
10062declare <3 x double> @llvm.experimental.constrained.asin.v3f64(<3 x double>, metadata, metadata)
10063declare <3 x float> @llvm.experimental.constrained.acos.v3f32(<3 x float>, metadata, metadata)
10064declare <3 x double> @llvm.experimental.constrained.acos.v3f64(<3 x double>, metadata, metadata)
10065declare <3 x float> @llvm.experimental.constrained.atan.v3f32(<3 x float>, metadata, metadata)
10066declare <3 x double> @llvm.experimental.constrained.atan.v3f64(<3 x double>, metadata, metadata)
10067declare <3 x float> @llvm.experimental.constrained.sinh.v3f32(<3 x float>, metadata, metadata)
10068declare <3 x double> @llvm.experimental.constrained.sinh.v3f64(<3 x double>, metadata, metadata)
10069declare <3 x float> @llvm.experimental.constrained.cosh.v3f32(<3 x float>, metadata, metadata)
10070declare <3 x double> @llvm.experimental.constrained.cosh.v3f64(<3 x double>, metadata, metadata)
10071declare <3 x float> @llvm.experimental.constrained.tanh.v3f32(<3 x float>, metadata, metadata)
10072declare <3 x double> @llvm.experimental.constrained.tanh.v3f64(<3 x double>, metadata, metadata)
10073declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata)
10074declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata)
10075declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata)
10076declare <3 x double> @llvm.experimental.constrained.exp2.v3f64(<3 x double>, metadata, metadata)
10077declare <3 x float> @llvm.experimental.constrained.log.v3f32(<3 x float>, metadata, metadata)
10078declare <3 x double> @llvm.experimental.constrained.log.v3f64(<3 x double>, metadata, metadata)
10079declare <3 x float> @llvm.experimental.constrained.log10.v3f32(<3 x float>, metadata, metadata)
10080declare <3 x double> @llvm.experimental.constrained.log10.v3f64(<3 x double>, metadata, metadata)
10081declare <3 x float> @llvm.experimental.constrained.log2.v3f32(<3 x float>, metadata, metadata)
10082declare <3 x double> @llvm.experimental.constrained.log2.v3f64(<3 x double>, metadata, metadata)
10083declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metadata, metadata)
10084declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata)
10085declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata)
10086declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata)
10087declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata)
10088declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata)
10089declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata)
10090declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata)
10091declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32(<3 x float>, metadata)
10092declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32(<3 x float>, metadata)
10093declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64(<3 x double>, metadata)
10094declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64(<3 x double>, metadata)
10095declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32(<3 x float>, metadata)
10096declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32(<3 x float>, metadata)
10097declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64(<3 x double>, metadata)
10098declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64(<3 x double>, metadata)
10099declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
10100declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
10101declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata)
10102declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata)
10103declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata)
10104declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata)
10105declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata)
10106declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata)
10107declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata)
10108declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata)
10109declare <3 x double> @llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32>, metadata, metadata)
10110declare <3 x float> @llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32>, metadata, metadata)
10111declare <3 x double> @llvm.experimental.constrained.sitofp.v3f64.v3i64(<3 x i64>, metadata, metadata)
10112declare <3 x float> @llvm.experimental.constrained.sitofp.v3f32.v3i64(<3 x i64>, metadata, metadata)
10113declare <3 x double> @llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32>, metadata, metadata)
10114declare <3 x float> @llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32>, metadata, metadata)
10115declare <3 x double> @llvm.experimental.constrained.uitofp.v3f64.v3i64(<3 x i64>, metadata, metadata)
10116declare <3 x float> @llvm.experimental.constrained.uitofp.v3f32.v3i64(<3 x i64>, metadata, metadata)
10117
10118; Double width declarations
10119declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
10120declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata)
10121declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata)
10122declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata)
10123declare <4 x double> @llvm.experimental.constrained.frem.v4f64(<4 x double>, <4 x double>, metadata, metadata)
10124declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata)
10125declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata)
10126declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata)
10127declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata)
10128declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata)
10129declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, metadata, metadata)
10130declare <4 x double> @llvm.experimental.constrained.asin.v4f64(<4 x double>, metadata, metadata)
10131declare <4 x double> @llvm.experimental.constrained.acos.v4f64(<4 x double>, metadata, metadata)
10132declare <4 x double> @llvm.experimental.constrained.atan.v4f64(<4 x double>, metadata, metadata)
10133declare <4 x double> @llvm.experimental.constrained.atan2.v4f64(<4 x double>, <4 x double>, metadata, metadata)
10134declare <4 x double> @llvm.experimental.constrained.sinh.v4f64(<4 x double>, metadata, metadata)
10135declare <4 x double> @llvm.experimental.constrained.cosh.v4f64(<4 x double>, metadata, metadata)
10136declare <4 x double> @llvm.experimental.constrained.tanh.v4f64(<4 x double>, metadata, metadata)
10137declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata)
10138declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata)
10139declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata)
10140declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata)
10141declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata)
10142declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
10143declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
10144declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata)
10145declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata)
10146declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata)
10147declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float>, metadata)
10148declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double>, metadata)
10149declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double>, metadata)
10150declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata)
10151declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float>, metadata)
10152declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double>, metadata)
10153declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double>, metadata)
10154declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
10155declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
10156declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata)
10157declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata)
10158declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata)
10159declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata)
10160declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
10161declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
10162declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
10163declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
10164declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
10165declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32>, metadata, metadata)
10166declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
10167declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
10168
10169