xref: /llvm-project/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint-fp16.ll (revision a2a0089ac3a5781ba74d4d319c87c9e8b46d4eda)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2  -O3 | FileCheck %s --check-prefixes=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c  -O3 | FileCheck %s --check-prefixes=AVX,F16C
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f  -O3 | FileCheck %s --check-prefixes=AVX,AVX512
5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64
7
8declare i1  @llvm.experimental.constrained.fptosi.i1.f16(half, metadata)
9declare i8  @llvm.experimental.constrained.fptosi.i8.f16(half, metadata)
10declare i16 @llvm.experimental.constrained.fptosi.i16.f16(half, metadata)
11declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata)
12declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata)
13declare i1  @llvm.experimental.constrained.fptoui.i1.f16(half, metadata)
14declare i8  @llvm.experimental.constrained.fptoui.i8.f16(half, metadata)
15declare i16 @llvm.experimental.constrained.fptoui.i16.f16(half, metadata)
16declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata)
17declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata)
18
19define i1 @fptosi_f16toi1(half %x) #0 {
20; SSE2-LABEL: fptosi_f16toi1:
21; SSE2:       # %bb.0:
22; SSE2-NEXT:    pushq %rax
23; SSE2-NEXT:    callq __extendhfsf2@PLT
24; SSE2-NEXT:    cvttss2si %xmm0, %eax
25; SSE2-NEXT:    # kill: def $al killed $al killed $eax
26; SSE2-NEXT:    popq %rcx
27; SSE2-NEXT:    retq
28;
29; AVX-LABEL: fptosi_f16toi1:
30; AVX:       # %bb.0:
31; AVX-NEXT:    vpextrw $0, %xmm0, %eax
32; AVX-NEXT:    movzwl %ax, %eax
33; AVX-NEXT:    vmovd %eax, %xmm0
34; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
35; AVX-NEXT:    vcvttss2si %xmm0, %eax
36; AVX-NEXT:    # kill: def $al killed $al killed $eax
37; AVX-NEXT:    retq
38;
39; X86-LABEL: fptosi_f16toi1:
40; X86:       # %bb.0:
41; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
42; X86-NEXT:    # kill: def $al killed $al killed $eax
43; X86-NEXT:    retl
44;
45; X64-LABEL: fptosi_f16toi1:
46; X64:       # %bb.0:
47; X64-NEXT:    vcvttsh2si %xmm0, %eax
48; X64-NEXT:    # kill: def $al killed $al killed $eax
49; X64-NEXT:    retq
50  %result = call i1 @llvm.experimental.constrained.fptosi.i1.f16(half %x,
51                                               metadata !"fpexcept.strict") #0
52  ret i1 %result
53}
54
55define i8 @fptosi_f16toi8(half %x) #0 {
56; SSE2-LABEL: fptosi_f16toi8:
57; SSE2:       # %bb.0:
58; SSE2-NEXT:    pushq %rax
59; SSE2-NEXT:    callq __extendhfsf2@PLT
60; SSE2-NEXT:    cvttss2si %xmm0, %eax
61; SSE2-NEXT:    # kill: def $al killed $al killed $eax
62; SSE2-NEXT:    popq %rcx
63; SSE2-NEXT:    retq
64;
65; AVX-LABEL: fptosi_f16toi8:
66; AVX:       # %bb.0:
67; AVX-NEXT:    vpextrw $0, %xmm0, %eax
68; AVX-NEXT:    movzwl %ax, %eax
69; AVX-NEXT:    vmovd %eax, %xmm0
70; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
71; AVX-NEXT:    vcvttss2si %xmm0, %eax
72; AVX-NEXT:    # kill: def $al killed $al killed $eax
73; AVX-NEXT:    retq
74;
75; X86-LABEL: fptosi_f16toi8:
76; X86:       # %bb.0:
77; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
78; X86-NEXT:    # kill: def $al killed $al killed $eax
79; X86-NEXT:    retl
80;
81; X64-LABEL: fptosi_f16toi8:
82; X64:       # %bb.0:
83; X64-NEXT:    vcvttsh2si %xmm0, %eax
84; X64-NEXT:    # kill: def $al killed $al killed $eax
85; X64-NEXT:    retq
86  %result = call i8 @llvm.experimental.constrained.fptosi.i8.f16(half %x,
87                                               metadata !"fpexcept.strict") #0
88  ret i8 %result
89}
90
91define i16 @fptosi_f16toi16(half %x) #0 {
92; SSE2-LABEL: fptosi_f16toi16:
93; SSE2:       # %bb.0:
94; SSE2-NEXT:    pushq %rax
95; SSE2-NEXT:    callq __extendhfsf2@PLT
96; SSE2-NEXT:    cvttss2si %xmm0, %eax
97; SSE2-NEXT:    # kill: def $ax killed $ax killed $eax
98; SSE2-NEXT:    popq %rcx
99; SSE2-NEXT:    retq
100;
101; AVX-LABEL: fptosi_f16toi16:
102; AVX:       # %bb.0:
103; AVX-NEXT:    vpextrw $0, %xmm0, %eax
104; AVX-NEXT:    movzwl %ax, %eax
105; AVX-NEXT:    vmovd %eax, %xmm0
106; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
107; AVX-NEXT:    vcvttss2si %xmm0, %eax
108; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
109; AVX-NEXT:    retq
110;
111; X86-LABEL: fptosi_f16toi16:
112; X86:       # %bb.0:
113; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
114; X86-NEXT:    # kill: def $ax killed $ax killed $eax
115; X86-NEXT:    retl
116;
117; X64-LABEL: fptosi_f16toi16:
118; X64:       # %bb.0:
119; X64-NEXT:    vcvttsh2si %xmm0, %eax
120; X64-NEXT:    # kill: def $ax killed $ax killed $eax
121; X64-NEXT:    retq
122  %result = call i16 @llvm.experimental.constrained.fptosi.i16.f16(half %x,
123                                               metadata !"fpexcept.strict") #0
124  ret i16 %result
125}
126
127define i32 @fptosi_f16toi32(half %x) #0 {
128; SSE2-LABEL: fptosi_f16toi32:
129; SSE2:       # %bb.0:
130; SSE2-NEXT:    pushq %rax
131; SSE2-NEXT:    callq __extendhfsf2@PLT
132; SSE2-NEXT:    cvttss2si %xmm0, %eax
133; SSE2-NEXT:    popq %rcx
134; SSE2-NEXT:    retq
135;
136; AVX-LABEL: fptosi_f16toi32:
137; AVX:       # %bb.0:
138; AVX-NEXT:    vpextrw $0, %xmm0, %eax
139; AVX-NEXT:    movzwl %ax, %eax
140; AVX-NEXT:    vmovd %eax, %xmm0
141; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
142; AVX-NEXT:    vcvttss2si %xmm0, %eax
143; AVX-NEXT:    retq
144;
145; X86-LABEL: fptosi_f16toi32:
146; X86:       # %bb.0:
147; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
148; X86-NEXT:    retl
149;
150; X64-LABEL: fptosi_f16toi32:
151; X64:       # %bb.0:
152; X64-NEXT:    vcvttsh2si %xmm0, %eax
153; X64-NEXT:    retq
154  %result = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x,
155                                               metadata !"fpexcept.strict") #0
156  ret i32 %result
157}
158
159define i64 @fptosi_f16toi64(half %x) #0 {
160; SSE2-LABEL: fptosi_f16toi64:
161; SSE2:       # %bb.0:
162; SSE2-NEXT:    pushq %rax
163; SSE2-NEXT:    callq __extendhfsf2@PLT
164; SSE2-NEXT:    cvttss2si %xmm0, %rax
165; SSE2-NEXT:    popq %rcx
166; SSE2-NEXT:    retq
167;
168; AVX-LABEL: fptosi_f16toi64:
169; AVX:       # %bb.0:
170; AVX-NEXT:    vpextrw $0, %xmm0, %eax
171; AVX-NEXT:    movzwl %ax, %eax
172; AVX-NEXT:    vmovd %eax, %xmm0
173; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
174; AVX-NEXT:    vcvttss2si %xmm0, %rax
175; AVX-NEXT:    retq
176;
177; X86-LABEL: fptosi_f16toi64:
178; X86:       # %bb.0:
179; X86-NEXT:    vmovsh {{[0-9]+}}(%esp), %xmm0
180; X86-NEXT:    vcvttph2qq %xmm0, %xmm0
181; X86-NEXT:    vmovd %xmm0, %eax
182; X86-NEXT:    vpextrd $1, %xmm0, %edx
183; X86-NEXT:    retl
184;
185; X64-LABEL: fptosi_f16toi64:
186; X64:       # %bb.0:
187; X64-NEXT:    vcvttsh2si %xmm0, %rax
188; X64-NEXT:    retq
189  %result = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x,
190                                               metadata !"fpexcept.strict") #0
191  ret i64 %result
192}
193
194define i1 @fptoui_f16toi1(half %x) #0 {
195; SSE2-LABEL: fptoui_f16toi1:
196; SSE2:       # %bb.0:
197; SSE2-NEXT:    pushq %rax
198; SSE2-NEXT:    callq __extendhfsf2@PLT
199; SSE2-NEXT:    cvttss2si %xmm0, %eax
200; SSE2-NEXT:    # kill: def $al killed $al killed $eax
201; SSE2-NEXT:    popq %rcx
202; SSE2-NEXT:    retq
203;
204; AVX-LABEL: fptoui_f16toi1:
205; AVX:       # %bb.0:
206; AVX-NEXT:    vpextrw $0, %xmm0, %eax
207; AVX-NEXT:    movzwl %ax, %eax
208; AVX-NEXT:    vmovd %eax, %xmm0
209; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
210; AVX-NEXT:    vcvttss2si %xmm0, %eax
211; AVX-NEXT:    # kill: def $al killed $al killed $eax
212; AVX-NEXT:    retq
213;
214; X86-LABEL: fptoui_f16toi1:
215; X86:       # %bb.0:
216; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
217; X86-NEXT:    # kill: def $al killed $al killed $eax
218; X86-NEXT:    retl
219;
220; X64-LABEL: fptoui_f16toi1:
221; X64:       # %bb.0:
222; X64-NEXT:    vcvttsh2si %xmm0, %eax
223; X64-NEXT:    # kill: def $al killed $al killed $eax
224; X64-NEXT:    retq
225  %result = call i1 @llvm.experimental.constrained.fptoui.i1.f16(half %x,
226                                               metadata !"fpexcept.strict") #0
227  ret i1 %result
228}
229
230define i8 @fptoui_f16toi8(half %x) #0 {
231; SSE2-LABEL: fptoui_f16toi8:
232; SSE2:       # %bb.0:
233; SSE2-NEXT:    pushq %rax
234; SSE2-NEXT:    callq __extendhfsf2@PLT
235; SSE2-NEXT:    cvttss2si %xmm0, %eax
236; SSE2-NEXT:    # kill: def $al killed $al killed $eax
237; SSE2-NEXT:    popq %rcx
238; SSE2-NEXT:    retq
239;
240; AVX-LABEL: fptoui_f16toi8:
241; AVX:       # %bb.0:
242; AVX-NEXT:    vpextrw $0, %xmm0, %eax
243; AVX-NEXT:    movzwl %ax, %eax
244; AVX-NEXT:    vmovd %eax, %xmm0
245; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
246; AVX-NEXT:    vcvttss2si %xmm0, %eax
247; AVX-NEXT:    # kill: def $al killed $al killed $eax
248; AVX-NEXT:    retq
249;
250; X86-LABEL: fptoui_f16toi8:
251; X86:       # %bb.0:
252; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
253; X86-NEXT:    # kill: def $al killed $al killed $eax
254; X86-NEXT:    retl
255;
256; X64-LABEL: fptoui_f16toi8:
257; X64:       # %bb.0:
258; X64-NEXT:    vcvttsh2si %xmm0, %eax
259; X64-NEXT:    # kill: def $al killed $al killed $eax
260; X64-NEXT:    retq
261  %result = call i8 @llvm.experimental.constrained.fptoui.i8.f16(half %x,
262                                               metadata !"fpexcept.strict") #0
263  ret i8 %result
264}
265
266define i16 @fptoui_f16toi16(half %x) #0 {
267; SSE2-LABEL: fptoui_f16toi16:
268; SSE2:       # %bb.0:
269; SSE2-NEXT:    pushq %rax
270; SSE2-NEXT:    callq __extendhfsf2@PLT
271; SSE2-NEXT:    cvttss2si %xmm0, %eax
272; SSE2-NEXT:    # kill: def $ax killed $ax killed $eax
273; SSE2-NEXT:    popq %rcx
274; SSE2-NEXT:    retq
275;
276; AVX-LABEL: fptoui_f16toi16:
277; AVX:       # %bb.0:
278; AVX-NEXT:    vpextrw $0, %xmm0, %eax
279; AVX-NEXT:    movzwl %ax, %eax
280; AVX-NEXT:    vmovd %eax, %xmm0
281; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
282; AVX-NEXT:    vcvttss2si %xmm0, %eax
283; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
284; AVX-NEXT:    retq
285;
286; X86-LABEL: fptoui_f16toi16:
287; X86:       # %bb.0:
288; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
289; X86-NEXT:    # kill: def $ax killed $ax killed $eax
290; X86-NEXT:    retl
291;
292; X64-LABEL: fptoui_f16toi16:
293; X64:       # %bb.0:
294; X64-NEXT:    vcvttsh2si %xmm0, %eax
295; X64-NEXT:    # kill: def $ax killed $ax killed $eax
296; X64-NEXT:    retq
297  %result = call i16 @llvm.experimental.constrained.fptoui.i16.f16(half %x,
298                                               metadata !"fpexcept.strict") #0
299  ret i16 %result
300}
301
302define i32 @fptoui_f16toi32(half %x) #0 {
303; SSE2-LABEL: fptoui_f16toi32:
304; SSE2:       # %bb.0:
305; SSE2-NEXT:    pushq %rax
306; SSE2-NEXT:    callq __extendhfsf2@PLT
307; SSE2-NEXT:    cvttss2si %xmm0, %rax
308; SSE2-NEXT:    # kill: def $eax killed $eax killed $rax
309; SSE2-NEXT:    popq %rcx
310; SSE2-NEXT:    retq
311;
312; F16C-LABEL: fptoui_f16toi32:
313; F16C:       # %bb.0:
314; F16C-NEXT:    vpextrw $0, %xmm0, %eax
315; F16C-NEXT:    movzwl %ax, %eax
316; F16C-NEXT:    vmovd %eax, %xmm0
317; F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
318; F16C-NEXT:    vcvttss2si %xmm0, %rax
319; F16C-NEXT:    # kill: def $eax killed $eax killed $rax
320; F16C-NEXT:    retq
321;
322; AVX512-LABEL: fptoui_f16toi32:
323; AVX512:       # %bb.0:
324; AVX512-NEXT:    vpextrw $0, %xmm0, %eax
325; AVX512-NEXT:    movzwl %ax, %eax
326; AVX512-NEXT:    vmovd %eax, %xmm0
327; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
328; AVX512-NEXT:    vcvttss2usi %xmm0, %eax
329; AVX512-NEXT:    retq
330;
331; X86-LABEL: fptoui_f16toi32:
332; X86:       # %bb.0:
333; X86-NEXT:    vcvttsh2usi {{[0-9]+}}(%esp), %eax
334; X86-NEXT:    retl
335;
336; X64-LABEL: fptoui_f16toi32:
337; X64:       # %bb.0:
338; X64-NEXT:    vcvttsh2usi %xmm0, %eax
339; X64-NEXT:    retq
340  %result = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x,
341                                               metadata !"fpexcept.strict") #0
342  ret i32 %result
343}
344
345define i64 @fptoui_f16toi64(half %x) #0 {
346; SSE2-LABEL: fptoui_f16toi64:
347; SSE2:       # %bb.0:
348; SSE2-NEXT:    pushq %rax
349; SSE2-NEXT:    callq __extendhfsf2@PLT
350; SSE2-NEXT:    movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
351; SSE2-NEXT:    comiss %xmm2, %xmm0
352; SSE2-NEXT:    xorps %xmm1, %xmm1
353; SSE2-NEXT:    jb .LBB9_2
354; SSE2-NEXT:  # %bb.1:
355; SSE2-NEXT:    movaps %xmm2, %xmm1
356; SSE2-NEXT:  .LBB9_2:
357; SSE2-NEXT:    subss %xmm1, %xmm0
358; SSE2-NEXT:    cvttss2si %xmm0, %rcx
359; SSE2-NEXT:    setae %al
360; SSE2-NEXT:    movzbl %al, %eax
361; SSE2-NEXT:    shlq $63, %rax
362; SSE2-NEXT:    xorq %rcx, %rax
363; SSE2-NEXT:    popq %rcx
364; SSE2-NEXT:    retq
365;
366; F16C-LABEL: fptoui_f16toi64:
367; F16C:       # %bb.0:
368; F16C-NEXT:    vpextrw $0, %xmm0, %eax
369; F16C-NEXT:    movzwl %ax, %eax
370; F16C-NEXT:    vmovd %eax, %xmm0
371; F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
372; F16C-NEXT:    vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0]
373; F16C-NEXT:    vcomiss %xmm1, %xmm0
374; F16C-NEXT:    vxorps %xmm2, %xmm2, %xmm2
375; F16C-NEXT:    jb .LBB9_2
376; F16C-NEXT:  # %bb.1:
377; F16C-NEXT:    vmovaps %xmm1, %xmm2
378; F16C-NEXT:  .LBB9_2:
379; F16C-NEXT:    vsubss %xmm2, %xmm0, %xmm0
380; F16C-NEXT:    vcvttss2si %xmm0, %rcx
381; F16C-NEXT:    setae %al
382; F16C-NEXT:    movzbl %al, %eax
383; F16C-NEXT:    shlq $63, %rax
384; F16C-NEXT:    xorq %rcx, %rax
385; F16C-NEXT:    retq
386;
387; AVX512-LABEL: fptoui_f16toi64:
388; AVX512:       # %bb.0:
389; AVX512-NEXT:    vpextrw $0, %xmm0, %eax
390; AVX512-NEXT:    movzwl %ax, %eax
391; AVX512-NEXT:    vmovd %eax, %xmm0
392; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
393; AVX512-NEXT:    vcvttss2usi %xmm0, %rax
394; AVX512-NEXT:    retq
395;
396; X86-LABEL: fptoui_f16toi64:
397; X86:       # %bb.0:
398; X86-NEXT:    vmovsh {{[0-9]+}}(%esp), %xmm0
399; X86-NEXT:    vcvttph2uqq %xmm0, %xmm0
400; X86-NEXT:    vmovd %xmm0, %eax
401; X86-NEXT:    vpextrd $1, %xmm0, %edx
402; X86-NEXT:    retl
403;
404; X64-LABEL: fptoui_f16toi64:
405; X64:       # %bb.0:
406; X64-NEXT:    vcvttsh2usi %xmm0, %rax
407; X64-NEXT:    retq
408  %result = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x,
409                                               metadata !"fpexcept.strict") #0
410  ret i64 %result
411}
412
413attributes #0 = { strictfp nounwind }
414