xref: /llvm-project/llvm/test/CodeGen/X86/uint_to_fp-3.ll (revision 6432658825ae35f0289bd10dfdf9614fc4e563ce)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
3; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X86-AVX
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX
6
7;PR29079
8
9define <4 x float> @mask_ucvt_4i32_4f32(<4 x i32> %a) {
10; X86-SSE-LABEL: mask_ucvt_4i32_4f32:
11; X86-SSE:       # %bb.0:
12; X86-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
13; X86-SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
14; X86-SSE-NEXT:    retl
15;
16; X86-AVX-LABEL: mask_ucvt_4i32_4f32:
17; X86-AVX:       # %bb.0:
18; X86-AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
19; X86-AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
20; X86-AVX-NEXT:    retl
21;
22; X64-SSE-LABEL: mask_ucvt_4i32_4f32:
23; X64-SSE:       # %bb.0:
24; X64-SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25; X64-SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
26; X64-SSE-NEXT:    retq
27;
28; X64-AVX-LABEL: mask_ucvt_4i32_4f32:
29; X64-AVX:       # %bb.0:
30; X64-AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31; X64-AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
32; X64-AVX-NEXT:    retq
33  %and = and <4 x i32> %a, <i32 127, i32 255, i32 4095, i32 65595>
34  %cvt = uitofp <4 x i32> %and to <4 x float>
35  ret <4 x float> %cvt
36}
37
38define <4 x double> @mask_ucvt_4i32_4f64(<4 x i32> %a) {
39; X86-SSE-LABEL: mask_ucvt_4i32_4f64:
40; X86-SSE:       # %bb.0:
41; X86-SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
42; X86-SSE-NEXT:    cvtdq2pd %xmm0, %xmm2
43; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
44; X86-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1
45; X86-SSE-NEXT:    movaps %xmm2, %xmm0
46; X86-SSE-NEXT:    retl
47;
48; X86-AVX-LABEL: mask_ucvt_4i32_4f64:
49; X86-AVX:       # %bb.0:
50; X86-AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
51; X86-AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
52; X86-AVX-NEXT:    retl
53;
54; X64-SSE-LABEL: mask_ucvt_4i32_4f64:
55; X64-SSE:       # %bb.0:
56; X64-SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
57; X64-SSE-NEXT:    cvtdq2pd %xmm0, %xmm2
58; X64-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
59; X64-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1
60; X64-SSE-NEXT:    movaps %xmm2, %xmm0
61; X64-SSE-NEXT:    retq
62;
63; X64-AVX-LABEL: mask_ucvt_4i32_4f64:
64; X64-AVX:       # %bb.0:
65; X64-AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
66; X64-AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
67; X64-AVX-NEXT:    retq
68  %and = and <4 x i32> %a, <i32 127, i32 255, i32 4095, i32 65595>
69  %cvt = uitofp <4 x i32> %and to <4 x double>
70  ret <4 x double> %cvt
71}
72
73; Regression noticed in D56387
74define <4 x float> @lshr_truncate_mask_ucvt_4i64_4f32(ptr%p0) {
75; X86-SSE-LABEL: lshr_truncate_mask_ucvt_4i64_4f32:
76; X86-SSE:       # %bb.0:
77; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
78; X86-SSE-NEXT:    movups (%eax), %xmm0
79; X86-SSE-NEXT:    movups 16(%eax), %xmm1
80; X86-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
81; X86-SSE-NEXT:    psrld $16, %xmm0
82; X86-SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
83; X86-SSE-NEXT:    mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
84; X86-SSE-NEXT:    retl
85;
86; X86-AVX-LABEL: lshr_truncate_mask_ucvt_4i64_4f32:
87; X86-AVX:       # %bb.0:
88; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
89; X86-AVX-NEXT:    vmovups (%eax), %xmm0
90; X86-AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],mem[0,2]
91; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
92; X86-AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
93; X86-AVX-NEXT:    vmulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
94; X86-AVX-NEXT:    retl
95;
96; X64-SSE-LABEL: lshr_truncate_mask_ucvt_4i64_4f32:
97; X64-SSE:       # %bb.0:
98; X64-SSE-NEXT:    movups (%rdi), %xmm0
99; X64-SSE-NEXT:    movups 16(%rdi), %xmm1
100; X64-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
101; X64-SSE-NEXT:    psrld $16, %xmm0
102; X64-SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
103; X64-SSE-NEXT:    mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
104; X64-SSE-NEXT:    retq
105;
106; X64-AVX-LABEL: lshr_truncate_mask_ucvt_4i64_4f32:
107; X64-AVX:       # %bb.0:
108; X64-AVX-NEXT:    vmovups (%rdi), %xmm0
109; X64-AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],mem[0,2]
110; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
111; X64-AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
112; X64-AVX-NEXT:    vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
113; X64-AVX-NEXT:    retq
114  %load = load <4 x i64>, ptr %p0, align 2
115  %lshr = lshr <4 x i64> %load, <i64 16, i64 16, i64 16, i64 16>
116  %and = and <4 x i64> %lshr, <i64 65535, i64 65535, i64 65535, i64 65535>
117  %uitofp = uitofp <4 x i64> %and to <4 x float>
118  %fmul = fmul <4 x float> %uitofp, <float 0x3EF0001000000000, float 0x3EF0001000000000, float 0x3EF0001000000000, float 0x3EF0001000000000>
119  ret <4 x float> %fmul
120}
121