xref: /llvm-project/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll (revision a2a0089ac3a5781ba74d4d319c87c9e8b46d4eda)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
3
4;
5; 32-bit float to unsigned integer
6;
7
8declare <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float>)
9declare <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float>)
10declare <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float>)
11declare <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float>)
12declare <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float>)
13declare <4 x i128> @llvm.fptoui.sat.v4i128.v4f32(<4 x float>)
14
15define <4 x i1> @test_unsigned_v4i1_v4f32(<4 x float> %f) nounwind {
16; CHECK-LABEL: test_unsigned_v4i1_v4f32:
17; CHECK:       # %bb.0:
18; CHECK-NEXT:    movaps %xmm0, %xmm1
19; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
20; CHECK-NEXT:    xorps %xmm2, %xmm2
21; CHECK-NEXT:    maxss %xmm2, %xmm1
22; CHECK-NEXT:    movss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
23; CHECK-NEXT:    minss %xmm3, %xmm1
24; CHECK-NEXT:    cvttss2si %xmm1, %eax
25; CHECK-NEXT:    movd %eax, %xmm1
26; CHECK-NEXT:    movaps %xmm0, %xmm4
27; CHECK-NEXT:    unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
28; CHECK-NEXT:    maxss %xmm2, %xmm4
29; CHECK-NEXT:    minss %xmm3, %xmm4
30; CHECK-NEXT:    cvttss2si %xmm4, %eax
31; CHECK-NEXT:    movd %eax, %xmm4
32; CHECK-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
33; CHECK-NEXT:    movaps %xmm0, %xmm1
34; CHECK-NEXT:    maxss %xmm2, %xmm1
35; CHECK-NEXT:    minss %xmm3, %xmm1
36; CHECK-NEXT:    cvttss2si %xmm1, %eax
37; CHECK-NEXT:    movd %eax, %xmm1
38; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
39; CHECK-NEXT:    maxss %xmm2, %xmm0
40; CHECK-NEXT:    minss %xmm3, %xmm0
41; CHECK-NEXT:    cvttss2si %xmm0, %eax
42; CHECK-NEXT:    movd %eax, %xmm0
43; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
44; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]
45; CHECK-NEXT:    movdqa %xmm1, %xmm0
46; CHECK-NEXT:    retq
47  %x = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> %f)
48  ret <4 x i1> %x
49}
50
51define <4 x i8> @test_unsigned_v4i8_v4f32(<4 x float> %f) nounwind {
52; CHECK-LABEL: test_unsigned_v4i8_v4f32:
53; CHECK:       # %bb.0:
54; CHECK-NEXT:    xorps %xmm1, %xmm1
55; CHECK-NEXT:    xorps %xmm3, %xmm3
56; CHECK-NEXT:    maxss %xmm0, %xmm3
57; CHECK-NEXT:    movss {{.*#+}} xmm2 = [2.55E+2,0.0E+0,0.0E+0,0.0E+0]
58; CHECK-NEXT:    movaps %xmm2, %xmm4
59; CHECK-NEXT:    minss %xmm3, %xmm4
60; CHECK-NEXT:    cvttss2si %xmm4, %eax
61; CHECK-NEXT:    movzbl %al, %eax
62; CHECK-NEXT:    movaps %xmm0, %xmm3
63; CHECK-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
64; CHECK-NEXT:    xorps %xmm4, %xmm4
65; CHECK-NEXT:    maxss %xmm3, %xmm4
66; CHECK-NEXT:    movaps %xmm2, %xmm3
67; CHECK-NEXT:    minss %xmm4, %xmm3
68; CHECK-NEXT:    cvttss2si %xmm3, %ecx
69; CHECK-NEXT:    movzbl %cl, %ecx
70; CHECK-NEXT:    shll $8, %ecx
71; CHECK-NEXT:    orl %eax, %ecx
72; CHECK-NEXT:    movaps %xmm0, %xmm3
73; CHECK-NEXT:    unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
74; CHECK-NEXT:    xorps %xmm4, %xmm4
75; CHECK-NEXT:    maxss %xmm3, %xmm4
76; CHECK-NEXT:    movaps %xmm2, %xmm3
77; CHECK-NEXT:    minss %xmm4, %xmm3
78; CHECK-NEXT:    cvttss2si %xmm3, %eax
79; CHECK-NEXT:    movzbl %al, %eax
80; CHECK-NEXT:    shll $16, %eax
81; CHECK-NEXT:    orl %ecx, %eax
82; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
83; CHECK-NEXT:    maxss %xmm0, %xmm1
84; CHECK-NEXT:    minss %xmm1, %xmm2
85; CHECK-NEXT:    cvttss2si %xmm2, %ecx
86; CHECK-NEXT:    shll $24, %ecx
87; CHECK-NEXT:    orl %eax, %ecx
88; CHECK-NEXT:    movd %ecx, %xmm0
89; CHECK-NEXT:    retq
90  %x = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f32(<4 x float> %f)
91  ret <4 x i8> %x
92}
93
94define <4 x i16> @test_unsigned_v4i16_v4f32(<4 x float> %f) nounwind {
95; CHECK-LABEL: test_unsigned_v4i16_v4f32:
96; CHECK:       # %bb.0:
97; CHECK-NEXT:    movaps %xmm0, %xmm1
98; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
99; CHECK-NEXT:    xorps %xmm2, %xmm2
100; CHECK-NEXT:    xorps %xmm3, %xmm3
101; CHECK-NEXT:    maxss %xmm1, %xmm3
102; CHECK-NEXT:    movss {{.*#+}} xmm4 = [6.5535E+4,0.0E+0,0.0E+0,0.0E+0]
103; CHECK-NEXT:    movaps %xmm4, %xmm1
104; CHECK-NEXT:    minss %xmm3, %xmm1
105; CHECK-NEXT:    cvttss2si %xmm1, %eax
106; CHECK-NEXT:    xorps %xmm1, %xmm1
107; CHECK-NEXT:    maxss %xmm0, %xmm1
108; CHECK-NEXT:    movaps %xmm4, %xmm3
109; CHECK-NEXT:    minss %xmm1, %xmm3
110; CHECK-NEXT:    cvttss2si %xmm3, %ecx
111; CHECK-NEXT:    movd %ecx, %xmm1
112; CHECK-NEXT:    pinsrw $1, %eax, %xmm1
113; CHECK-NEXT:    movaps %xmm0, %xmm3
114; CHECK-NEXT:    unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
115; CHECK-NEXT:    xorps %xmm5, %xmm5
116; CHECK-NEXT:    maxss %xmm3, %xmm5
117; CHECK-NEXT:    movaps %xmm4, %xmm3
118; CHECK-NEXT:    minss %xmm5, %xmm3
119; CHECK-NEXT:    cvttss2si %xmm3, %eax
120; CHECK-NEXT:    pinsrw $2, %eax, %xmm1
121; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
122; CHECK-NEXT:    maxss %xmm0, %xmm2
123; CHECK-NEXT:    minss %xmm2, %xmm4
124; CHECK-NEXT:    cvttss2si %xmm4, %eax
125; CHECK-NEXT:    pinsrw $3, %eax, %xmm1
126; CHECK-NEXT:    movdqa %xmm1, %xmm0
127; CHECK-NEXT:    retq
128  %x = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> %f)
129  ret <4 x i16> %x
130}
131
132define <4 x i32> @test_unsigned_v4i32_v4f32(<4 x float> %f) nounwind {
133; CHECK-LABEL: test_unsigned_v4i32_v4f32:
134; CHECK:       # %bb.0:
135; CHECK-NEXT:    movaps %xmm0, %xmm1
136; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
137; CHECK-NEXT:    cvttss2si %xmm1, %rdx
138; CHECK-NEXT:    xorl %eax, %eax
139; CHECK-NEXT:    xorps %xmm2, %xmm2
140; CHECK-NEXT:    ucomiss %xmm2, %xmm1
141; CHECK-NEXT:    cmovbl %eax, %edx
142; CHECK-NEXT:    movss {{.*#+}} xmm3 = [4.29496704E+9,0.0E+0,0.0E+0,0.0E+0]
143; CHECK-NEXT:    ucomiss %xmm3, %xmm1
144; CHECK-NEXT:    movl $-1, %ecx
145; CHECK-NEXT:    cmoval %ecx, %edx
146; CHECK-NEXT:    movd %edx, %xmm1
147; CHECK-NEXT:    movaps %xmm0, %xmm4
148; CHECK-NEXT:    unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
149; CHECK-NEXT:    cvttss2si %xmm4, %rdx
150; CHECK-NEXT:    ucomiss %xmm2, %xmm4
151; CHECK-NEXT:    cmovbl %eax, %edx
152; CHECK-NEXT:    ucomiss %xmm3, %xmm4
153; CHECK-NEXT:    cmoval %ecx, %edx
154; CHECK-NEXT:    movd %edx, %xmm4
155; CHECK-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
156; CHECK-NEXT:    cvttss2si %xmm0, %rdx
157; CHECK-NEXT:    ucomiss %xmm2, %xmm0
158; CHECK-NEXT:    cmovbl %eax, %edx
159; CHECK-NEXT:    ucomiss %xmm3, %xmm0
160; CHECK-NEXT:    cmoval %ecx, %edx
161; CHECK-NEXT:    movd %edx, %xmm1
162; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
163; CHECK-NEXT:    cvttss2si %xmm0, %rdx
164; CHECK-NEXT:    ucomiss %xmm2, %xmm0
165; CHECK-NEXT:    cmovbl %eax, %edx
166; CHECK-NEXT:    ucomiss %xmm3, %xmm0
167; CHECK-NEXT:    cmoval %ecx, %edx
168; CHECK-NEXT:    movd %edx, %xmm0
169; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
170; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]
171; CHECK-NEXT:    movdqa %xmm1, %xmm0
172; CHECK-NEXT:    retq
173  %x = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %f)
174  ret <4 x i32> %x
175}
176
177define <4 x i64> @test_unsigned_v4i64_v4f32(<4 x float> %f) nounwind {
178; CHECK-LABEL: test_unsigned_v4i64_v4f32:
179; CHECK:       # %bb.0:
180; CHECK-NEXT:    movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
181; CHECK-NEXT:    movaps %xmm0, %xmm2
182; CHECK-NEXT:    subss %xmm1, %xmm2
183; CHECK-NEXT:    cvttss2si %xmm2, %rax
184; CHECK-NEXT:    cvttss2si %xmm0, %rcx
185; CHECK-NEXT:    movq %rcx, %rdx
186; CHECK-NEXT:    sarq $63, %rdx
187; CHECK-NEXT:    andq %rax, %rdx
188; CHECK-NEXT:    orq %rcx, %rdx
189; CHECK-NEXT:    xorl %eax, %eax
190; CHECK-NEXT:    xorps %xmm3, %xmm3
191; CHECK-NEXT:    ucomiss %xmm3, %xmm0
192; CHECK-NEXT:    cmovbq %rax, %rdx
193; CHECK-NEXT:    movss {{.*#+}} xmm4 = [1.8446743E+19,0.0E+0,0.0E+0,0.0E+0]
194; CHECK-NEXT:    ucomiss %xmm4, %xmm0
195; CHECK-NEXT:    movq $-1, %rcx
196; CHECK-NEXT:    cmovaq %rcx, %rdx
197; CHECK-NEXT:    movq %rdx, %xmm2
198; CHECK-NEXT:    movaps %xmm0, %xmm5
199; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[1,1]
200; CHECK-NEXT:    movaps %xmm5, %xmm6
201; CHECK-NEXT:    subss %xmm1, %xmm6
202; CHECK-NEXT:    cvttss2si %xmm6, %rdx
203; CHECK-NEXT:    cvttss2si %xmm5, %rsi
204; CHECK-NEXT:    movq %rsi, %rdi
205; CHECK-NEXT:    sarq $63, %rdi
206; CHECK-NEXT:    andq %rdx, %rdi
207; CHECK-NEXT:    orq %rsi, %rdi
208; CHECK-NEXT:    ucomiss %xmm3, %xmm5
209; CHECK-NEXT:    cmovbq %rax, %rdi
210; CHECK-NEXT:    ucomiss %xmm4, %xmm5
211; CHECK-NEXT:    cmovaq %rcx, %rdi
212; CHECK-NEXT:    movq %rdi, %xmm5
213; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
214; CHECK-NEXT:    movaps %xmm0, %xmm5
215; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[3,3],xmm0[3,3]
216; CHECK-NEXT:    movaps %xmm5, %xmm6
217; CHECK-NEXT:    subss %xmm1, %xmm6
218; CHECK-NEXT:    cvttss2si %xmm6, %rdx
219; CHECK-NEXT:    cvttss2si %xmm5, %rsi
220; CHECK-NEXT:    movq %rsi, %rdi
221; CHECK-NEXT:    sarq $63, %rdi
222; CHECK-NEXT:    andq %rdx, %rdi
223; CHECK-NEXT:    orq %rsi, %rdi
224; CHECK-NEXT:    ucomiss %xmm3, %xmm5
225; CHECK-NEXT:    cmovbq %rax, %rdi
226; CHECK-NEXT:    ucomiss %xmm4, %xmm5
227; CHECK-NEXT:    cmovaq %rcx, %rdi
228; CHECK-NEXT:    movq %rdi, %xmm5
229; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
230; CHECK-NEXT:    movaps %xmm0, %xmm6
231; CHECK-NEXT:    subss %xmm1, %xmm6
232; CHECK-NEXT:    cvttss2si %xmm6, %rdx
233; CHECK-NEXT:    cvttss2si %xmm0, %rsi
234; CHECK-NEXT:    movq %rsi, %rdi
235; CHECK-NEXT:    sarq $63, %rdi
236; CHECK-NEXT:    andq %rdx, %rdi
237; CHECK-NEXT:    orq %rsi, %rdi
238; CHECK-NEXT:    ucomiss %xmm3, %xmm0
239; CHECK-NEXT:    cmovbq %rax, %rdi
240; CHECK-NEXT:    ucomiss %xmm4, %xmm0
241; CHECK-NEXT:    cmovaq %rcx, %rdi
242; CHECK-NEXT:    movq %rdi, %xmm1
243; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
244; CHECK-NEXT:    movdqa %xmm2, %xmm0
245; CHECK-NEXT:    retq
246  %x = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> %f)
247  ret <4 x i64> %x
248}
249
250define <4 x i128> @test_unsigned_v4i128_v4f32(<4 x float> %f) nounwind {
251; CHECK-LABEL: test_unsigned_v4i128_v4f32:
252; CHECK:       # %bb.0:
253; CHECK-NEXT:    pushq %rbp
254; CHECK-NEXT:    pushq %r15
255; CHECK-NEXT:    pushq %r14
256; CHECK-NEXT:    pushq %r13
257; CHECK-NEXT:    pushq %r12
258; CHECK-NEXT:    pushq %rbx
259; CHECK-NEXT:    subq $56, %rsp
260; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
261; CHECK-NEXT:    movq %rdi, %rbx
262; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
263; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
264; CHECK-NEXT:    callq __fixunssfti@PLT
265; CHECK-NEXT:    movq %rdx, %r15
266; CHECK-NEXT:    xorl %r14d, %r14d
267; CHECK-NEXT:    xorps %xmm0, %xmm0
268; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
269; CHECK-NEXT:    ucomiss %xmm0, %xmm1
270; CHECK-NEXT:    cmovbq %r14, %r15
271; CHECK-NEXT:    cmovbq %r14, %rax
272; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
273; CHECK-NEXT:    movq $-1, %rbp
274; CHECK-NEXT:    cmovaq %rbp, %rax
275; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
276; CHECK-NEXT:    cmovaq %rbp, %r15
277; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
278; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
279; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
280; CHECK-NEXT:    callq __fixunssfti@PLT
281; CHECK-NEXT:    movq %rax, %r12
282; CHECK-NEXT:    movq %rdx, %r13
283; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
284; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
285; CHECK-NEXT:    cmovbq %r14, %r13
286; CHECK-NEXT:    cmovbq %r14, %r12
287; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
288; CHECK-NEXT:    cmovaq %rbp, %r12
289; CHECK-NEXT:    cmovaq %rbp, %r13
290; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
291; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
292; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
293; CHECK-NEXT:    callq __fixunssfti@PLT
294; CHECK-NEXT:    movq %rax, %rbp
295; CHECK-NEXT:    movq %rdx, %r14
296; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
297; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
298; CHECK-NEXT:    movl $0, %eax
299; CHECK-NEXT:    cmovbq %rax, %r14
300; CHECK-NEXT:    cmovbq %rax, %rbp
301; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
302; CHECK-NEXT:    movq $-1, %rax
303; CHECK-NEXT:    cmovaq %rax, %rbp
304; CHECK-NEXT:    cmovaq %rax, %r14
305; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
306; CHECK-NEXT:    callq __fixunssfti@PLT
307; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
308; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
309; CHECK-NEXT:    movl $0, %ecx
310; CHECK-NEXT:    cmovbq %rcx, %rdx
311; CHECK-NEXT:    cmovbq %rcx, %rax
312; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
313; CHECK-NEXT:    movq $-1, %rcx
314; CHECK-NEXT:    cmovaq %rcx, %rax
315; CHECK-NEXT:    cmovaq %rcx, %rdx
316; CHECK-NEXT:    movq %rdx, 8(%rbx)
317; CHECK-NEXT:    movq %rax, (%rbx)
318; CHECK-NEXT:    movq %r14, 56(%rbx)
319; CHECK-NEXT:    movq %rbp, 48(%rbx)
320; CHECK-NEXT:    movq %r13, 40(%rbx)
321; CHECK-NEXT:    movq %r12, 32(%rbx)
322; CHECK-NEXT:    movq %r15, 24(%rbx)
323; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
324; CHECK-NEXT:    movq %rax, 16(%rbx)
325; CHECK-NEXT:    movq %rbx, %rax
326; CHECK-NEXT:    addq $56, %rsp
327; CHECK-NEXT:    popq %rbx
328; CHECK-NEXT:    popq %r12
329; CHECK-NEXT:    popq %r13
330; CHECK-NEXT:    popq %r14
331; CHECK-NEXT:    popq %r15
332; CHECK-NEXT:    popq %rbp
333; CHECK-NEXT:    retq
334  %x = call <4 x i128> @llvm.fptoui.sat.v4i128.v4f32(<4 x float> %f)
335  ret <4 x i128> %x
336}
337
338;
339; 64-bit float to unsigned integer
340;
341
342declare <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double>)
343declare <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double>)
344declare <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double>)
345declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double>)
346declare <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double>)
347declare <2 x i128> @llvm.fptoui.sat.v2i128.v2f64(<2 x double>)
348
349define <2 x i1> @test_unsigned_v2i1_v2f64(<2 x double> %f) nounwind {
350; CHECK-LABEL: test_unsigned_v2i1_v2f64:
351; CHECK:       # %bb.0:
352; CHECK-NEXT:    xorpd %xmm2, %xmm2
353; CHECK-NEXT:    movapd %xmm0, %xmm1
354; CHECK-NEXT:    maxsd %xmm2, %xmm1
355; CHECK-NEXT:    movsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0]
356; CHECK-NEXT:    minsd %xmm3, %xmm1
357; CHECK-NEXT:    cvttsd2si %xmm1, %rax
358; CHECK-NEXT:    movq %rax, %xmm1
359; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
360; CHECK-NEXT:    maxsd %xmm2, %xmm0
361; CHECK-NEXT:    minsd %xmm3, %xmm0
362; CHECK-NEXT:    cvttsd2si %xmm0, %rax
363; CHECK-NEXT:    movq %rax, %xmm0
364; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
365; CHECK-NEXT:    movdqa %xmm1, %xmm0
366; CHECK-NEXT:    retq
367  %x = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> %f)
368  ret <2 x i1> %x
369}
370
371define <2 x i8> @test_unsigned_v2i8_v2f64(<2 x double> %f) nounwind {
372; CHECK-LABEL: test_unsigned_v2i8_v2f64:
373; CHECK:       # %bb.0:
374; CHECK-NEXT:    xorpd %xmm1, %xmm1
375; CHECK-NEXT:    xorpd %xmm2, %xmm2
376; CHECK-NEXT:    maxsd %xmm0, %xmm2
377; CHECK-NEXT:    movsd {{.*#+}} xmm3 = [2.55E+2,0.0E+0]
378; CHECK-NEXT:    movapd %xmm3, %xmm4
379; CHECK-NEXT:    minsd %xmm2, %xmm4
380; CHECK-NEXT:    cvttsd2si %xmm4, %eax
381; CHECK-NEXT:    movzbl %al, %eax
382; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
383; CHECK-NEXT:    maxsd %xmm0, %xmm1
384; CHECK-NEXT:    minsd %xmm1, %xmm3
385; CHECK-NEXT:    cvttsd2si %xmm3, %ecx
386; CHECK-NEXT:    shll $8, %ecx
387; CHECK-NEXT:    orl %eax, %ecx
388; CHECK-NEXT:    movd %ecx, %xmm0
389; CHECK-NEXT:    retq
390  %x = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> %f)
391  ret <2 x i8> %x
392}
393
394define <2 x i16> @test_unsigned_v2i16_v2f64(<2 x double> %f) nounwind {
395; CHECK-LABEL: test_unsigned_v2i16_v2f64:
396; CHECK:       # %bb.0:
397; CHECK-NEXT:    xorpd %xmm1, %xmm1
398; CHECK-NEXT:    maxsd %xmm0, %xmm1
399; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
400; CHECK-NEXT:    xorpd %xmm2, %xmm2
401; CHECK-NEXT:    maxsd %xmm0, %xmm2
402; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [6.5535E+4,0.0E+0]
403; CHECK-NEXT:    movapd %xmm0, %xmm3
404; CHECK-NEXT:    minsd %xmm2, %xmm3
405; CHECK-NEXT:    cvttsd2si %xmm3, %eax
406; CHECK-NEXT:    minsd %xmm1, %xmm0
407; CHECK-NEXT:    cvttsd2si %xmm0, %ecx
408; CHECK-NEXT:    movd %ecx, %xmm0
409; CHECK-NEXT:    pinsrw $1, %eax, %xmm0
410; CHECK-NEXT:    retq
411  %x = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f64(<2 x double> %f)
412  ret <2 x i16> %x
413}
414
415define <2 x i32> @test_unsigned_v2i32_v2f64(<2 x double> %f) nounwind {
416; CHECK-LABEL: test_unsigned_v2i32_v2f64:
417; CHECK:       # %bb.0:
418; CHECK-NEXT:    xorpd %xmm2, %xmm2
419; CHECK-NEXT:    xorpd %xmm1, %xmm1
420; CHECK-NEXT:    maxsd %xmm0, %xmm1
421; CHECK-NEXT:    movsd {{.*#+}} xmm3 = [4.294967295E+9,0.0E+0]
422; CHECK-NEXT:    movapd %xmm3, %xmm4
423; CHECK-NEXT:    minsd %xmm1, %xmm4
424; CHECK-NEXT:    cvttsd2si %xmm4, %rax
425; CHECK-NEXT:    movd %eax, %xmm1
426; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
427; CHECK-NEXT:    maxsd %xmm0, %xmm2
428; CHECK-NEXT:    minsd %xmm2, %xmm3
429; CHECK-NEXT:    cvttsd2si %xmm3, %rax
430; CHECK-NEXT:    movd %eax, %xmm0
431; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
432; CHECK-NEXT:    movdqa %xmm1, %xmm0
433; CHECK-NEXT:    retq
434  %x = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %f)
435  ret <2 x i32> %x
436}
437
438define <2 x i64> @test_unsigned_v2i64_v2f64(<2 x double> %f) nounwind {
439; CHECK-LABEL: test_unsigned_v2i64_v2f64:
440; CHECK:       # %bb.0:
441; CHECK-NEXT:    movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0]
442; CHECK-NEXT:    movapd %xmm0, %xmm1
443; CHECK-NEXT:    subsd %xmm2, %xmm1
444; CHECK-NEXT:    cvttsd2si %xmm1, %rax
445; CHECK-NEXT:    cvttsd2si %xmm0, %rcx
446; CHECK-NEXT:    movq %rcx, %rdx
447; CHECK-NEXT:    sarq $63, %rdx
448; CHECK-NEXT:    andq %rax, %rdx
449; CHECK-NEXT:    orq %rcx, %rdx
450; CHECK-NEXT:    xorl %eax, %eax
451; CHECK-NEXT:    xorpd %xmm3, %xmm3
452; CHECK-NEXT:    ucomisd %xmm3, %xmm0
453; CHECK-NEXT:    cmovbq %rax, %rdx
454; CHECK-NEXT:    movsd {{.*#+}} xmm4 = [1.844674407370955E+19,0.0E+0]
455; CHECK-NEXT:    ucomisd %xmm4, %xmm0
456; CHECK-NEXT:    movq $-1, %rcx
457; CHECK-NEXT:    cmovaq %rcx, %rdx
458; CHECK-NEXT:    movq %rdx, %xmm1
459; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
460; CHECK-NEXT:    movapd %xmm0, %xmm5
461; CHECK-NEXT:    subsd %xmm2, %xmm5
462; CHECK-NEXT:    cvttsd2si %xmm5, %rdx
463; CHECK-NEXT:    cvttsd2si %xmm0, %rsi
464; CHECK-NEXT:    movq %rsi, %rdi
465; CHECK-NEXT:    sarq $63, %rdi
466; CHECK-NEXT:    andq %rdx, %rdi
467; CHECK-NEXT:    orq %rsi, %rdi
468; CHECK-NEXT:    ucomisd %xmm3, %xmm0
469; CHECK-NEXT:    cmovbq %rax, %rdi
470; CHECK-NEXT:    ucomisd %xmm4, %xmm0
471; CHECK-NEXT:    cmovaq %rcx, %rdi
472; CHECK-NEXT:    movq %rdi, %xmm0
473; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
474; CHECK-NEXT:    movdqa %xmm1, %xmm0
475; CHECK-NEXT:    retq
476  %x = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %f)
477  ret <2 x i64> %x
478}
479
480define <2 x i128> @test_unsigned_v2i128_v2f64(<2 x double> %f) nounwind {
481; CHECK-LABEL: test_unsigned_v2i128_v2f64:
482; CHECK:       # %bb.0:
483; CHECK-NEXT:    pushq %r15
484; CHECK-NEXT:    pushq %r14
485; CHECK-NEXT:    pushq %r13
486; CHECK-NEXT:    pushq %r12
487; CHECK-NEXT:    pushq %rbx
488; CHECK-NEXT:    subq $32, %rsp
489; CHECK-NEXT:    movapd %xmm0, (%rsp) # 16-byte Spill
490; CHECK-NEXT:    movq %rdi, %rbx
491; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
492; CHECK-NEXT:    movapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
493; CHECK-NEXT:    callq __fixunsdfti@PLT
494; CHECK-NEXT:    movq %rax, %r14
495; CHECK-NEXT:    movq %rdx, %r15
496; CHECK-NEXT:    xorl %r12d, %r12d
497; CHECK-NEXT:    xorpd %xmm0, %xmm0
498; CHECK-NEXT:    movapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
499; CHECK-NEXT:    ucomisd %xmm0, %xmm1
500; CHECK-NEXT:    cmovbq %r12, %r15
501; CHECK-NEXT:    cmovbq %r12, %r14
502; CHECK-NEXT:    ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
503; CHECK-NEXT:    movq $-1, %r13
504; CHECK-NEXT:    cmovaq %r13, %r14
505; CHECK-NEXT:    cmovaq %r13, %r15
506; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
507; CHECK-NEXT:    callq __fixunsdfti@PLT
508; CHECK-NEXT:    movapd (%rsp), %xmm0 # 16-byte Reload
509; CHECK-NEXT:    ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
510; CHECK-NEXT:    cmovbq %r12, %rdx
511; CHECK-NEXT:    cmovbq %r12, %rax
512; CHECK-NEXT:    ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
513; CHECK-NEXT:    cmovaq %r13, %rax
514; CHECK-NEXT:    cmovaq %r13, %rdx
515; CHECK-NEXT:    movq %rdx, 8(%rbx)
516; CHECK-NEXT:    movq %rax, (%rbx)
517; CHECK-NEXT:    movq %r15, 24(%rbx)
518; CHECK-NEXT:    movq %r14, 16(%rbx)
519; CHECK-NEXT:    movq %rbx, %rax
520; CHECK-NEXT:    addq $32, %rsp
521; CHECK-NEXT:    popq %rbx
522; CHECK-NEXT:    popq %r12
523; CHECK-NEXT:    popq %r13
524; CHECK-NEXT:    popq %r14
525; CHECK-NEXT:    popq %r15
526; CHECK-NEXT:    retq
527  %x = call <2 x i128> @llvm.fptoui.sat.v2i128.v2f64(<2 x double> %f)
528  ret <2 x i128> %x
529}
530
531;
532; 16-bit float to unsigned integer
533;
534
535declare <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half>)
536declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half>)
537declare <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half>)
538declare <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half>)
539declare <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half>)
540declare <8 x i128> @llvm.fptoui.sat.v8i128.v8f16(<8 x half>)
541
542define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
543; CHECK-LABEL: test_unsigned_v8i1_v8f16:
544; CHECK:       # %bb.0:
545; CHECK-NEXT:    pushq %rbp
546; CHECK-NEXT:    pushq %rbx
547; CHECK-NEXT:    subq $72, %rsp
548; CHECK-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
549; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
550; CHECK-NEXT:    callq __extendhfsf2@PLT
551; CHECK-NEXT:    cvttss2si %xmm0, %eax
552; CHECK-NEXT:    xorl %ebx, %ebx
553; CHECK-NEXT:    xorps %xmm1, %xmm1
554; CHECK-NEXT:    ucomiss %xmm1, %xmm0
555; CHECK-NEXT:    cmovbl %ebx, %eax
556; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
557; CHECK-NEXT:    movl $1, %ebp
558; CHECK-NEXT:    cmoval %ebp, %eax
559; CHECK-NEXT:    movd %eax, %xmm0
560; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
561; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
562; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
563; CHECK-NEXT:    callq __extendhfsf2@PLT
564; CHECK-NEXT:    cvttss2si %xmm0, %eax
565; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
566; CHECK-NEXT:    cmovbl %ebx, %eax
567; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
568; CHECK-NEXT:    cmoval %ebp, %eax
569; CHECK-NEXT:    movd %eax, %xmm0
570; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
571; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
572; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
573; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
574; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
575; CHECK-NEXT:    callq __extendhfsf2@PLT
576; CHECK-NEXT:    cvttss2si %xmm0, %eax
577; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
578; CHECK-NEXT:    cmovbl %ebx, %eax
579; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
580; CHECK-NEXT:    cmoval %ebp, %eax
581; CHECK-NEXT:    movd %eax, %xmm0
582; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
583; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
584; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
585; CHECK-NEXT:    callq __extendhfsf2@PLT
586; CHECK-NEXT:    cvttss2si %xmm0, %eax
587; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
588; CHECK-NEXT:    cmovbl %ebx, %eax
589; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
590; CHECK-NEXT:    cmoval %ebp, %eax
591; CHECK-NEXT:    movd %eax, %xmm0
592; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
593; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
594; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
595; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
596; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
597; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
598; CHECK-NEXT:    psrlq $48, %xmm0
599; CHECK-NEXT:    callq __extendhfsf2@PLT
600; CHECK-NEXT:    cvttss2si %xmm0, %eax
601; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
602; CHECK-NEXT:    cmovbl %ebx, %eax
603; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
604; CHECK-NEXT:    cmoval %ebp, %eax
605; CHECK-NEXT:    movd %eax, %xmm0
606; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
607; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
608; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
609; CHECK-NEXT:    callq __extendhfsf2@PLT
610; CHECK-NEXT:    cvttss2si %xmm0, %eax
611; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
612; CHECK-NEXT:    cmovbl %ebx, %eax
613; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
614; CHECK-NEXT:    cmoval %ebp, %eax
615; CHECK-NEXT:    movd %eax, %xmm0
616; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
617; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
618; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
619; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
620; CHECK-NEXT:    callq __extendhfsf2@PLT
621; CHECK-NEXT:    cvttss2si %xmm0, %eax
622; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
623; CHECK-NEXT:    cmovbl %ebx, %eax
624; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
625; CHECK-NEXT:    cmoval %ebp, %eax
626; CHECK-NEXT:    movd %eax, %xmm0
627; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
628; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
629; CHECK-NEXT:    psrld $16, %xmm0
630; CHECK-NEXT:    callq __extendhfsf2@PLT
631; CHECK-NEXT:    cvttss2si %xmm0, %eax
632; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
633; CHECK-NEXT:    cmovbl %ebx, %eax
634; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
635; CHECK-NEXT:    cmoval %ebp, %eax
636; CHECK-NEXT:    movd %eax, %xmm1
637; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
638; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
639; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
640; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
641; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
642; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
643; CHECK-NEXT:    addq $72, %rsp
644; CHECK-NEXT:    popq %rbx
645; CHECK-NEXT:    popq %rbp
646; CHECK-NEXT:    retq
647  %x = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> %f)
648  ret <8 x i1> %x
649}
650
651define <8 x i8> @test_unsigned_v8i8_v8f16(<8 x half> %f) nounwind {
652; CHECK-LABEL: test_unsigned_v8i8_v8f16:
653; CHECK:       # %bb.0:
654; CHECK-NEXT:    pushq %rbp
655; CHECK-NEXT:    pushq %r15
656; CHECK-NEXT:    pushq %r14
657; CHECK-NEXT:    pushq %r12
658; CHECK-NEXT:    pushq %rbx
659; CHECK-NEXT:    subq $32, %rsp
660; CHECK-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
661; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
662; CHECK-NEXT:    callq __extendhfsf2@PLT
663; CHECK-NEXT:    cvttss2si %xmm0, %r15d
664; CHECK-NEXT:    xorl %ebx, %ebx
665; CHECK-NEXT:    xorps %xmm1, %xmm1
666; CHECK-NEXT:    ucomiss %xmm1, %xmm0
667; CHECK-NEXT:    cmovbl %ebx, %r15d
668; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
669; CHECK-NEXT:    movl $255, %ebp
670; CHECK-NEXT:    cmoval %ebp, %r15d
671; CHECK-NEXT:    shll $8, %r15d
672; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
673; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
674; CHECK-NEXT:    callq __extendhfsf2@PLT
675; CHECK-NEXT:    cvttss2si %xmm0, %eax
676; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
677; CHECK-NEXT:    cmovbl %ebx, %eax
678; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
679; CHECK-NEXT:    cmoval %ebp, %eax
680; CHECK-NEXT:    movzbl %al, %r14d
681; CHECK-NEXT:    orl %r15d, %r14d
682; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
683; CHECK-NEXT:    callq __extendhfsf2@PLT
684; CHECK-NEXT:    cvttss2si %xmm0, %eax
685; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
686; CHECK-NEXT:    cmovbl %ebx, %eax
687; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
688; CHECK-NEXT:    cmoval %ebp, %eax
689; CHECK-NEXT:    movzbl %al, %r15d
690; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
691; CHECK-NEXT:    psrld $16, %xmm0
692; CHECK-NEXT:    callq __extendhfsf2@PLT
693; CHECK-NEXT:    cvttss2si %xmm0, %eax
694; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
695; CHECK-NEXT:    cmovbl %ebx, %eax
696; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
697; CHECK-NEXT:    cmoval %ebp, %eax
698; CHECK-NEXT:    movzbl %al, %r12d
699; CHECK-NEXT:    shll $8, %r12d
700; CHECK-NEXT:    orl %r15d, %r12d
701; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
702; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
703; CHECK-NEXT:    callq __extendhfsf2@PLT
704; CHECK-NEXT:    cvttss2si %xmm0, %eax
705; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
706; CHECK-NEXT:    cmovbl %ebx, %eax
707; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
708; CHECK-NEXT:    cmoval %ebp, %eax
709; CHECK-NEXT:    movzbl %al, %r15d
710; CHECK-NEXT:    shll $16, %r15d
711; CHECK-NEXT:    orl %r12d, %r15d
712; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
713; CHECK-NEXT:    psrlq $48, %xmm0
714; CHECK-NEXT:    callq __extendhfsf2@PLT
715; CHECK-NEXT:    cvttss2si %xmm0, %eax
716; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
717; CHECK-NEXT:    cmovbl %ebx, %eax
718; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
719; CHECK-NEXT:    cmoval %ebp, %eax
720; CHECK-NEXT:    shll $24, %eax
721; CHECK-NEXT:    orl %r15d, %eax
722; CHECK-NEXT:    movd %eax, %xmm0
723; CHECK-NEXT:    pinsrw $2, %r14d, %xmm0
724; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
725; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
726; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
727; CHECK-NEXT:    callq __extendhfsf2@PLT
728; CHECK-NEXT:    cvttss2si %xmm0, %r14d
729; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
730; CHECK-NEXT:    cmovbl %ebx, %r14d
731; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
732; CHECK-NEXT:    cmoval %ebp, %r14d
733; CHECK-NEXT:    shll $8, %r14d
734; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
735; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
736; CHECK-NEXT:    callq __extendhfsf2@PLT
737; CHECK-NEXT:    cvttss2si %xmm0, %eax
738; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
739; CHECK-NEXT:    cmovbl %ebx, %eax
740; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
741; CHECK-NEXT:    cmoval %ebp, %eax
742; CHECK-NEXT:    movzbl %al, %eax
743; CHECK-NEXT:    orl %r14d, %eax
744; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
745; CHECK-NEXT:    pinsrw $3, %eax, %xmm0
746; CHECK-NEXT:    addq $32, %rsp
747; CHECK-NEXT:    popq %rbx
748; CHECK-NEXT:    popq %r12
749; CHECK-NEXT:    popq %r14
750; CHECK-NEXT:    popq %r15
751; CHECK-NEXT:    popq %rbp
752; CHECK-NEXT:    retq
753  %x = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> %f)
754  ret <8 x i8> %x
755}
756
757define <8 x i16> @test_unsigned_v8i16_v8f16(<8 x half> %f) nounwind {
758; CHECK-LABEL: test_unsigned_v8i16_v8f16:
759; CHECK:       # %bb.0:
760; CHECK-NEXT:    pushq %rbp
761; CHECK-NEXT:    pushq %rbx
762; CHECK-NEXT:    subq $72, %rsp
763; CHECK-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
764; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
765; CHECK-NEXT:    callq __extendhfsf2@PLT
766; CHECK-NEXT:    cvttss2si %xmm0, %eax
767; CHECK-NEXT:    xorl %ebx, %ebx
768; CHECK-NEXT:    xorps %xmm1, %xmm1
769; CHECK-NEXT:    ucomiss %xmm1, %xmm0
770; CHECK-NEXT:    cmovbl %ebx, %eax
771; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
772; CHECK-NEXT:    movl $65535, %ebp # imm = 0xFFFF
773; CHECK-NEXT:    cmoval %ebp, %eax
774; CHECK-NEXT:    movd %eax, %xmm0
775; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
776; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
777; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
778; CHECK-NEXT:    callq __extendhfsf2@PLT
779; CHECK-NEXT:    cvttss2si %xmm0, %eax
780; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
781; CHECK-NEXT:    cmovbl %ebx, %eax
782; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
783; CHECK-NEXT:    cmoval %ebp, %eax
784; CHECK-NEXT:    movd %eax, %xmm0
785; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
786; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
787; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
788; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
789; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
790; CHECK-NEXT:    callq __extendhfsf2@PLT
791; CHECK-NEXT:    cvttss2si %xmm0, %eax
792; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
793; CHECK-NEXT:    cmovbl %ebx, %eax
794; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
795; CHECK-NEXT:    cmoval %ebp, %eax
796; CHECK-NEXT:    movd %eax, %xmm0
797; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
798; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
799; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
800; CHECK-NEXT:    callq __extendhfsf2@PLT
801; CHECK-NEXT:    cvttss2si %xmm0, %eax
802; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
803; CHECK-NEXT:    cmovbl %ebx, %eax
804; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
805; CHECK-NEXT:    cmoval %ebp, %eax
806; CHECK-NEXT:    movd %eax, %xmm0
807; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
808; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
809; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
810; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
811; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
812; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
813; CHECK-NEXT:    psrlq $48, %xmm0
814; CHECK-NEXT:    callq __extendhfsf2@PLT
815; CHECK-NEXT:    cvttss2si %xmm0, %eax
816; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
817; CHECK-NEXT:    cmovbl %ebx, %eax
818; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
819; CHECK-NEXT:    cmoval %ebp, %eax
820; CHECK-NEXT:    movd %eax, %xmm0
821; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
822; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
823; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
824; CHECK-NEXT:    callq __extendhfsf2@PLT
825; CHECK-NEXT:    cvttss2si %xmm0, %eax
826; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
827; CHECK-NEXT:    cmovbl %ebx, %eax
828; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
829; CHECK-NEXT:    cmoval %ebp, %eax
830; CHECK-NEXT:    movd %eax, %xmm0
831; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
832; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
833; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
834; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
835; CHECK-NEXT:    callq __extendhfsf2@PLT
836; CHECK-NEXT:    cvttss2si %xmm0, %eax
837; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
838; CHECK-NEXT:    cmovbl %ebx, %eax
839; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
840; CHECK-NEXT:    cmoval %ebp, %eax
841; CHECK-NEXT:    movd %eax, %xmm0
842; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
843; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
844; CHECK-NEXT:    psrld $16, %xmm0
845; CHECK-NEXT:    callq __extendhfsf2@PLT
846; CHECK-NEXT:    cvttss2si %xmm0, %eax
847; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
848; CHECK-NEXT:    cmovbl %ebx, %eax
849; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
850; CHECK-NEXT:    cmoval %ebp, %eax
851; CHECK-NEXT:    movd %eax, %xmm1
852; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
853; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
854; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
855; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
856; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
857; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
858; CHECK-NEXT:    addq $72, %rsp
859; CHECK-NEXT:    popq %rbx
860; CHECK-NEXT:    popq %rbp
861; CHECK-NEXT:    retq
862  %x = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %f)
863  ret <8 x i16> %x
864}
865
866define <8 x i32> @test_unsigned_v8i32_v8f16(<8 x half> %f) nounwind {
867; CHECK-LABEL: test_unsigned_v8i32_v8f16:
868; CHECK:       # %bb.0:
869; CHECK-NEXT:    pushq %rbp
870; CHECK-NEXT:    pushq %rbx
871; CHECK-NEXT:    subq $72, %rsp
872; CHECK-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
873; CHECK-NEXT:    psrlq $48, %xmm0
874; CHECK-NEXT:    callq __extendhfsf2@PLT
875; CHECK-NEXT:    cvttss2si %xmm0, %rax
876; CHECK-NEXT:    xorl %ebx, %ebx
877; CHECK-NEXT:    xorps %xmm1, %xmm1
878; CHECK-NEXT:    ucomiss %xmm1, %xmm0
879; CHECK-NEXT:    cmovbl %ebx, %eax
880; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
881; CHECK-NEXT:    movl $-1, %ebp
882; CHECK-NEXT:    cmoval %ebp, %eax
883; CHECK-NEXT:    movd %eax, %xmm0
884; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
885; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
886; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
887; CHECK-NEXT:    callq __extendhfsf2@PLT
888; CHECK-NEXT:    cvttss2si %xmm0, %rax
889; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
890; CHECK-NEXT:    cmovbl %ebx, %eax
891; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
892; CHECK-NEXT:    cmoval %ebp, %eax
893; CHECK-NEXT:    movd %eax, %xmm0
894; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
895; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
896; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
897; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
898; CHECK-NEXT:    callq __extendhfsf2@PLT
899; CHECK-NEXT:    cvttss2si %xmm0, %rax
900; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
901; CHECK-NEXT:    cmovbl %ebx, %eax
902; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
903; CHECK-NEXT:    cmoval %ebp, %eax
904; CHECK-NEXT:    movd %eax, %xmm0
905; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
906; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
907; CHECK-NEXT:    psrld $16, %xmm0
908; CHECK-NEXT:    callq __extendhfsf2@PLT
909; CHECK-NEXT:    cvttss2si %xmm0, %rax
910; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
911; CHECK-NEXT:    cmovbl %ebx, %eax
912; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
913; CHECK-NEXT:    cmoval %ebp, %eax
914; CHECK-NEXT:    movd %eax, %xmm0
915; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
916; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
917; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
918; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
919; CHECK-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
920; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
921; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
922; CHECK-NEXT:    callq __extendhfsf2@PLT
923; CHECK-NEXT:    cvttss2si %xmm0, %rax
924; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
925; CHECK-NEXT:    cmovbl %ebx, %eax
926; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
927; CHECK-NEXT:    cmoval %ebp, %eax
928; CHECK-NEXT:    movd %eax, %xmm0
929; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
930; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
931; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
932; CHECK-NEXT:    callq __extendhfsf2@PLT
933; CHECK-NEXT:    cvttss2si %xmm0, %rax
934; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
935; CHECK-NEXT:    cmovbl %ebx, %eax
936; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
937; CHECK-NEXT:    cmoval %ebp, %eax
938; CHECK-NEXT:    movd %eax, %xmm0
939; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
940; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
941; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
942; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
943; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
944; CHECK-NEXT:    callq __extendhfsf2@PLT
945; CHECK-NEXT:    cvttss2si %xmm0, %rax
946; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
947; CHECK-NEXT:    cmovbl %ebx, %eax
948; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
949; CHECK-NEXT:    cmoval %ebp, %eax
950; CHECK-NEXT:    movd %eax, %xmm0
951; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
952; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
953; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
954; CHECK-NEXT:    callq __extendhfsf2@PLT
955; CHECK-NEXT:    cvttss2si %xmm0, %rax
956; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
957; CHECK-NEXT:    cmovbl %ebx, %eax
958; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
959; CHECK-NEXT:    cmoval %ebp, %eax
960; CHECK-NEXT:    movd %eax, %xmm1
961; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
962; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
963; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
964; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
965; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
966; CHECK-NEXT:    addq $72, %rsp
967; CHECK-NEXT:    popq %rbx
968; CHECK-NEXT:    popq %rbp
969; CHECK-NEXT:    retq
970  %x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> %f)
971  ret <8 x i32> %x
972}
973
974define <8 x i64> @test_unsigned_v8i64_v8f16(<8 x half> %f) nounwind {
975; CHECK-LABEL: test_unsigned_v8i64_v8f16:
976; CHECK:       # %bb.0:
977; CHECK-NEXT:    pushq %r14
978; CHECK-NEXT:    pushq %rbx
979; CHECK-NEXT:    subq $88, %rsp
980; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
981; CHECK-NEXT:    callq __extendhfsf2@PLT
982; CHECK-NEXT:    movaps %xmm0, %xmm1
983; CHECK-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
984; CHECK-NEXT:    cvttss2si %xmm1, %rax
985; CHECK-NEXT:    cvttss2si %xmm0, %rcx
986; CHECK-NEXT:    movq %rcx, %rdx
987; CHECK-NEXT:    sarq $63, %rdx
988; CHECK-NEXT:    andq %rax, %rdx
989; CHECK-NEXT:    orq %rcx, %rdx
990; CHECK-NEXT:    xorl %ebx, %ebx
991; CHECK-NEXT:    xorps %xmm1, %xmm1
992; CHECK-NEXT:    ucomiss %xmm1, %xmm0
993; CHECK-NEXT:    cmovbq %rbx, %rdx
994; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
995; CHECK-NEXT:    movq $-1, %r14
996; CHECK-NEXT:    cmovaq %r14, %rdx
997; CHECK-NEXT:    movq %rdx, %xmm0
998; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
999; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
1000; CHECK-NEXT:    psrld $16, %xmm0
1001; CHECK-NEXT:    callq __extendhfsf2@PLT
1002; CHECK-NEXT:    movdqa %xmm0, %xmm1
1003; CHECK-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1004; CHECK-NEXT:    cvttss2si %xmm1, %rax
1005; CHECK-NEXT:    cvttss2si %xmm0, %rcx
1006; CHECK-NEXT:    movq %rcx, %rdx
1007; CHECK-NEXT:    sarq $63, %rdx
1008; CHECK-NEXT:    andq %rax, %rdx
1009; CHECK-NEXT:    orq %rcx, %rdx
1010; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1011; CHECK-NEXT:    cmovbq %rbx, %rdx
1012; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1013; CHECK-NEXT:    cmovaq %r14, %rdx
1014; CHECK-NEXT:    movq %rdx, %xmm0
1015; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1016; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1017; CHECK-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1018; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
1019; CHECK-NEXT:    psrlq $48, %xmm0
1020; CHECK-NEXT:    callq __extendhfsf2@PLT
1021; CHECK-NEXT:    movdqa %xmm0, %xmm1
1022; CHECK-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1023; CHECK-NEXT:    cvttss2si %xmm1, %rax
1024; CHECK-NEXT:    cvttss2si %xmm0, %rcx
1025; CHECK-NEXT:    movq %rcx, %rdx
1026; CHECK-NEXT:    sarq $63, %rdx
1027; CHECK-NEXT:    andq %rax, %rdx
1028; CHECK-NEXT:    orq %rcx, %rdx
1029; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1030; CHECK-NEXT:    cmovbq %rbx, %rdx
1031; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1032; CHECK-NEXT:    cmovaq %r14, %rdx
1033; CHECK-NEXT:    movq %rdx, %xmm0
1034; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1035; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1036; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1037; CHECK-NEXT:    callq __extendhfsf2@PLT
1038; CHECK-NEXT:    movaps %xmm0, %xmm1
1039; CHECK-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1040; CHECK-NEXT:    cvttss2si %xmm1, %rax
1041; CHECK-NEXT:    cvttss2si %xmm0, %rcx
1042; CHECK-NEXT:    movq %rcx, %rdx
1043; CHECK-NEXT:    sarq $63, %rdx
1044; CHECK-NEXT:    andq %rax, %rdx
1045; CHECK-NEXT:    orq %rcx, %rdx
1046; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1047; CHECK-NEXT:    cmovbq %rbx, %rdx
1048; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1049; CHECK-NEXT:    cmovaq %r14, %rdx
1050; CHECK-NEXT:    movq %rdx, %xmm0
1051; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1052; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1053; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1054; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
1055; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1056; CHECK-NEXT:    callq __extendhfsf2@PLT
1057; CHECK-NEXT:    movdqa %xmm0, %xmm1
1058; CHECK-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1059; CHECK-NEXT:    cvttss2si %xmm1, %rax
1060; CHECK-NEXT:    cvttss2si %xmm0, %rcx
1061; CHECK-NEXT:    movq %rcx, %rdx
1062; CHECK-NEXT:    sarq $63, %rdx
1063; CHECK-NEXT:    andq %rax, %rdx
1064; CHECK-NEXT:    orq %rcx, %rdx
1065; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1066; CHECK-NEXT:    cmovbq %rbx, %rdx
1067; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1068; CHECK-NEXT:    cmovaq %r14, %rdx
1069; CHECK-NEXT:    movq %rdx, %xmm0
1070; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1071; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1072; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
1073; CHECK-NEXT:    callq __extendhfsf2@PLT
1074; CHECK-NEXT:    movaps %xmm0, %xmm1
1075; CHECK-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1076; CHECK-NEXT:    cvttss2si %xmm1, %rax
1077; CHECK-NEXT:    cvttss2si %xmm0, %rcx
1078; CHECK-NEXT:    movq %rcx, %rdx
1079; CHECK-NEXT:    sarq $63, %rdx
1080; CHECK-NEXT:    andq %rax, %rdx
1081; CHECK-NEXT:    orq %rcx, %rdx
1082; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1083; CHECK-NEXT:    cmovbq %rbx, %rdx
1084; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1085; CHECK-NEXT:    cmovaq %r14, %rdx
1086; CHECK-NEXT:    movq %rdx, %xmm0
1087; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1088; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
1089; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1090; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
1091; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1092; CHECK-NEXT:    callq __extendhfsf2@PLT
1093; CHECK-NEXT:    movdqa %xmm0, %xmm1
1094; CHECK-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1095; CHECK-NEXT:    cvttss2si %xmm1, %rax
1096; CHECK-NEXT:    cvttss2si %xmm0, %rcx
1097; CHECK-NEXT:    movq %rcx, %rdx
1098; CHECK-NEXT:    sarq $63, %rdx
1099; CHECK-NEXT:    andq %rax, %rdx
1100; CHECK-NEXT:    orq %rcx, %rdx
1101; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1102; CHECK-NEXT:    cmovbq %rbx, %rdx
1103; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1104; CHECK-NEXT:    cmovaq %r14, %rdx
1105; CHECK-NEXT:    movq %rdx, %xmm0
1106; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1107; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1108; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1109; CHECK-NEXT:    callq __extendhfsf2@PLT
1110; CHECK-NEXT:    movaps %xmm0, %xmm1
1111; CHECK-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1112; CHECK-NEXT:    cvttss2si %xmm1, %rax
1113; CHECK-NEXT:    cvttss2si %xmm0, %rcx
1114; CHECK-NEXT:    movq %rcx, %rdx
1115; CHECK-NEXT:    sarq $63, %rdx
1116; CHECK-NEXT:    andq %rax, %rdx
1117; CHECK-NEXT:    orq %rcx, %rdx
1118; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1119; CHECK-NEXT:    cmovbq %rbx, %rdx
1120; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1121; CHECK-NEXT:    cmovaq %r14, %rdx
1122; CHECK-NEXT:    movq %rdx, %xmm3
1123; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload
1124; CHECK-NEXT:    # xmm3 = xmm3[0],mem[0]
1125; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1126; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1127; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
1128; CHECK-NEXT:    addq $88, %rsp
1129; CHECK-NEXT:    popq %rbx
1130; CHECK-NEXT:    popq %r14
1131; CHECK-NEXT:    retq
1132  %x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> %f)
1133  ret <8 x i64> %x
1134}
1135
1136define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
1137; CHECK-LABEL: test_unsigned_v8i128_v8f16:
1138; CHECK:       # %bb.0:
1139; CHECK-NEXT:    pushq %rbp
1140; CHECK-NEXT:    pushq %r15
1141; CHECK-NEXT:    pushq %r14
1142; CHECK-NEXT:    pushq %r13
1143; CHECK-NEXT:    pushq %r12
1144; CHECK-NEXT:    pushq %rbx
1145; CHECK-NEXT:    subq $104, %rsp
1146; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1147; CHECK-NEXT:    movq %rdi, %rbx
1148; CHECK-NEXT:    psrld $16, %xmm0
1149; CHECK-NEXT:    callq __extendhfsf2@PLT
1150; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1151; CHECK-NEXT:    callq __fixunssfti@PLT
1152; CHECK-NEXT:    xorl %r12d, %r12d
1153; CHECK-NEXT:    pxor %xmm0, %xmm0
1154; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1155; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1156; CHECK-NEXT:    ucomiss %xmm0, %xmm1
1157; CHECK-NEXT:    cmovbq %r12, %rdx
1158; CHECK-NEXT:    cmovbq %r12, %rax
1159; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1160; CHECK-NEXT:    movq $-1, %r13
1161; CHECK-NEXT:    cmovaq %r13, %rax
1162; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1163; CHECK-NEXT:    cmovaq %r13, %rdx
1164; CHECK-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1165; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1166; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1167; CHECK-NEXT:    callq __extendhfsf2@PLT
1168; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1169; CHECK-NEXT:    callq __fixunssfti@PLT
1170; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1171; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
1172; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1173; CHECK-NEXT:    cmovbq %r12, %rdx
1174; CHECK-NEXT:    cmovbq %r12, %rax
1175; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1176; CHECK-NEXT:    cmovaq %r13, %rax
1177; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1178; CHECK-NEXT:    cmovaq %r13, %rdx
1179; CHECK-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1180; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1181; CHECK-NEXT:    psrlq $48, %xmm0
1182; CHECK-NEXT:    callq __extendhfsf2@PLT
1183; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1184; CHECK-NEXT:    callq __fixunssfti@PLT
1185; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1186; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
1187; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1188; CHECK-NEXT:    cmovbq %r12, %rdx
1189; CHECK-NEXT:    cmovbq %r12, %rax
1190; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1191; CHECK-NEXT:    cmovaq %r13, %rax
1192; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1193; CHECK-NEXT:    cmovaq %r13, %rdx
1194; CHECK-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1195; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1196; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
1197; CHECK-NEXT:    callq __extendhfsf2@PLT
1198; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1199; CHECK-NEXT:    callq __fixunssfti@PLT
1200; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1201; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
1202; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1203; CHECK-NEXT:    cmovbq %r12, %rdx
1204; CHECK-NEXT:    cmovbq %r12, %rax
1205; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1206; CHECK-NEXT:    cmovaq %r13, %rax
1207; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1208; CHECK-NEXT:    cmovaq %r13, %rdx
1209; CHECK-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1210; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1211; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1212; CHECK-NEXT:    callq __extendhfsf2@PLT
1213; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1214; CHECK-NEXT:    callq __fixunssfti@PLT
1215; CHECK-NEXT:    movq %rdx, %rbp
1216; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1217; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
1218; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1219; CHECK-NEXT:    cmovbq %r12, %rbp
1220; CHECK-NEXT:    cmovbq %r12, %rax
1221; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1222; CHECK-NEXT:    cmovaq %r13, %rax
1223; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1224; CHECK-NEXT:    cmovaq %r13, %rbp
1225; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1226; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1227; CHECK-NEXT:    callq __extendhfsf2@PLT
1228; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1229; CHECK-NEXT:    callq __fixunssfti@PLT
1230; CHECK-NEXT:    movq %rax, %r14
1231; CHECK-NEXT:    movq %rdx, %r15
1232; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1233; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
1234; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1235; CHECK-NEXT:    cmovbq %r12, %r15
1236; CHECK-NEXT:    cmovbq %r12, %r14
1237; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1238; CHECK-NEXT:    cmovaq %r13, %r14
1239; CHECK-NEXT:    cmovaq %r13, %r15
1240; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1241; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1242; CHECK-NEXT:    callq __extendhfsf2@PLT
1243; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1244; CHECK-NEXT:    callq __fixunssfti@PLT
1245; CHECK-NEXT:    movq %rax, %r12
1246; CHECK-NEXT:    movq %rdx, %r13
1247; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1248; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
1249; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1250; CHECK-NEXT:    movl $0, %eax
1251; CHECK-NEXT:    cmovbq %rax, %r13
1252; CHECK-NEXT:    cmovbq %rax, %r12
1253; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1254; CHECK-NEXT:    movq $-1, %rax
1255; CHECK-NEXT:    cmovaq %rax, %r12
1256; CHECK-NEXT:    cmovaq %rax, %r13
1257; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1258; CHECK-NEXT:    callq __extendhfsf2@PLT
1259; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1260; CHECK-NEXT:    callq __fixunssfti@PLT
1261; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
1262; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
1263; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1264; CHECK-NEXT:    movl $0, %ecx
1265; CHECK-NEXT:    cmovbq %rcx, %rdx
1266; CHECK-NEXT:    cmovbq %rcx, %rax
1267; CHECK-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1268; CHECK-NEXT:    movq $-1, %rcx
1269; CHECK-NEXT:    cmovaq %rcx, %rax
1270; CHECK-NEXT:    cmovaq %rcx, %rdx
1271; CHECK-NEXT:    movq %rdx, 8(%rbx)
1272; CHECK-NEXT:    movq %rax, (%rbx)
1273; CHECK-NEXT:    movq %r13, 120(%rbx)
1274; CHECK-NEXT:    movq %r12, 112(%rbx)
1275; CHECK-NEXT:    movq %r15, 104(%rbx)
1276; CHECK-NEXT:    movq %r14, 96(%rbx)
1277; CHECK-NEXT:    movq %rbp, 88(%rbx)
1278; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1279; CHECK-NEXT:    movq %rax, 80(%rbx)
1280; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1281; CHECK-NEXT:    movq %rax, 72(%rbx)
1282; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1283; CHECK-NEXT:    movq %rax, 64(%rbx)
1284; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1285; CHECK-NEXT:    movq %rax, 56(%rbx)
1286; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1287; CHECK-NEXT:    movq %rax, 48(%rbx)
1288; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1289; CHECK-NEXT:    movq %rax, 40(%rbx)
1290; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1291; CHECK-NEXT:    movq %rax, 32(%rbx)
1292; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1293; CHECK-NEXT:    movq %rax, 24(%rbx)
1294; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1295; CHECK-NEXT:    movq %rax, 16(%rbx)
1296; CHECK-NEXT:    movq %rbx, %rax
1297; CHECK-NEXT:    addq $104, %rsp
1298; CHECK-NEXT:    popq %rbx
1299; CHECK-NEXT:    popq %r12
1300; CHECK-NEXT:    popq %r13
1301; CHECK-NEXT:    popq %r14
1302; CHECK-NEXT:    popq %r15
1303; CHECK-NEXT:    popq %rbp
1304; CHECK-NEXT:    retq
1305  %x = call <8 x i128> @llvm.fptoui.sat.v8i128.v8f16(<8 x half> %f)
1306  ret <8 x i128> %x
1307}
1308