xref: /llvm-project/llvm/test/CodeGen/X86/avx2-conversions.ll (revision 834cc88c5d08ca55664b7742590463de813d768f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86,X86-SLOW
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,X86,X86-FAST-ALL
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,X86,X86-FAST-PERLANE
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64,X64-SLOW
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,X64,X64-FAST-ALL
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,X64,X64-FAST-PERLANE
8
9define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
10; X86-SLOW-LABEL: trunc4:
11; X86-SLOW:       # %bb.0:
12; X86-SLOW-NEXT:    vextractf128 $1, %ymm0, %xmm1
13; X86-SLOW-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
14; X86-SLOW-NEXT:    vzeroupper
15; X86-SLOW-NEXT:    retl
16;
17; X86-FAST-ALL-LABEL: trunc4:
18; X86-FAST-ALL:       # %bb.0:
19; X86-FAST-ALL-NEXT:    vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6]
20; X86-FAST-ALL-NEXT:    # ymm1 = mem[0,1,0,1]
21; X86-FAST-ALL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
22; X86-FAST-ALL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
23; X86-FAST-ALL-NEXT:    vzeroupper
24; X86-FAST-ALL-NEXT:    retl
25;
26; X86-FAST-PERLANE-LABEL: trunc4:
27; X86-FAST-PERLANE:       # %bb.0:
28; X86-FAST-PERLANE-NEXT:    vextractf128 $1, %ymm0, %xmm1
29; X86-FAST-PERLANE-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
30; X86-FAST-PERLANE-NEXT:    vzeroupper
31; X86-FAST-PERLANE-NEXT:    retl
32;
33; X64-SLOW-LABEL: trunc4:
34; X64-SLOW:       # %bb.0:
35; X64-SLOW-NEXT:    vextractf128 $1, %ymm0, %xmm1
36; X64-SLOW-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
37; X64-SLOW-NEXT:    vzeroupper
38; X64-SLOW-NEXT:    retq
39;
40; X64-FAST-ALL-LABEL: trunc4:
41; X64-FAST-ALL:       # %bb.0:
42; X64-FAST-ALL-NEXT:    vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6]
43; X64-FAST-ALL-NEXT:    # ymm1 = mem[0,1,0,1]
44; X64-FAST-ALL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
45; X64-FAST-ALL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
46; X64-FAST-ALL-NEXT:    vzeroupper
47; X64-FAST-ALL-NEXT:    retq
48;
49; X64-FAST-PERLANE-LABEL: trunc4:
50; X64-FAST-PERLANE:       # %bb.0:
51; X64-FAST-PERLANE-NEXT:    vextractf128 $1, %ymm0, %xmm1
52; X64-FAST-PERLANE-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
53; X64-FAST-PERLANE-NEXT:    vzeroupper
54; X64-FAST-PERLANE-NEXT:    retq
55  %B = trunc <4 x i64> %A to <4 x i32>
56  ret <4 x i32>%B
57}
58
59define <8 x i16> @trunc8(<8 x i32> %A) nounwind {
60; CHECK-LABEL: trunc8:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
63; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
64; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
65; CHECK-NEXT:    vzeroupper
66; CHECK-NEXT:    ret{{[l|q]}}
67  %B = trunc <8 x i32> %A to <8 x i16>
68  ret <8 x i16>%B
69}
70
71define <4 x i64> @sext4(<4 x i32> %A) nounwind {
72; CHECK-LABEL: sext4:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    vpmovsxdq %xmm0, %ymm0
75; CHECK-NEXT:    ret{{[l|q]}}
76  %B = sext <4 x i32> %A to <4 x i64>
77  ret <4 x i64>%B
78}
79
80define <8 x i32> @sext8(<8 x i16> %A) nounwind {
81; CHECK-LABEL: sext8:
82; CHECK:       # %bb.0:
83; CHECK-NEXT:    vpmovsxwd %xmm0, %ymm0
84; CHECK-NEXT:    ret{{[l|q]}}
85  %B = sext <8 x i16> %A to <8 x i32>
86  ret <8 x i32>%B
87}
88
89define <4 x i64> @zext4(<4 x i32> %A) nounwind {
90; CHECK-LABEL: zext4:
91; CHECK:       # %bb.0:
92; CHECK-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
93; CHECK-NEXT:    ret{{[l|q]}}
94  %B = zext <4 x i32> %A to <4 x i64>
95  ret <4 x i64>%B
96}
97
98define <8 x i32> @zext8(<8 x i16> %A) nounwind {
99; CHECK-LABEL: zext8:
100; CHECK:       # %bb.0:
101; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
102; CHECK-NEXT:    ret{{[l|q]}}
103  %B = zext <8 x i16> %A to <8 x i32>
104  ret <8 x i32>%B
105}
106
107define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
108; CHECK-LABEL: zext_8i8_8i32:
109; CHECK:       # %bb.0:
110; CHECK-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
111; CHECK-NEXT:    ret{{[l|q]}}
112  %B = zext <8 x i8> %A to <8 x i32>
113  ret <8 x i32>%B
114}
115
116define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) {
117; CHECK-LABEL: zext_16i8_16i16:
118; CHECK:       # %bb.0:
119; CHECK-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
120; CHECK-NEXT:    ret{{[l|q]}}
121  %t = zext <16 x i8> %z to <16 x i16>
122  ret <16 x i16> %t
123}
124
125define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) {
126; CHECK-LABEL: sext_16i8_16i16:
127; CHECK:       # %bb.0:
128; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
129; CHECK-NEXT:    ret{{[l|q]}}
130  %t = sext <16 x i8> %z to <16 x i16>
131  ret <16 x i16> %t
132}
133
134define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
135; X86-LABEL: trunc_16i16_16i8:
136; X86:       # %bb.0:
137; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
138; X86-NEXT:    vextracti128 $1, %ymm0, %xmm1
139; X86-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
140; X86-NEXT:    vzeroupper
141; X86-NEXT:    retl
142;
143; X64-LABEL: trunc_16i16_16i8:
144; X64:       # %bb.0:
145; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
146; X64-NEXT:    vextracti128 $1, %ymm0, %xmm1
147; X64-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
148; X64-NEXT:    vzeroupper
149; X64-NEXT:    retq
150  %t = trunc <16 x i16> %z to <16 x i8>
151  ret <16 x i8> %t
152}
153
154define <4 x i64> @load_sext_test1(ptr%ptr) {
155; X86-LABEL: load_sext_test1:
156; X86:       # %bb.0:
157; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
158; X86-NEXT:    vpmovsxdq (%eax), %ymm0
159; X86-NEXT:    retl
160;
161; X64-LABEL: load_sext_test1:
162; X64:       # %bb.0:
163; X64-NEXT:    vpmovsxdq (%rdi), %ymm0
164; X64-NEXT:    retq
165 %X = load <4 x i32>, ptr %ptr
166 %Y = sext <4 x i32> %X to <4 x i64>
167 ret <4 x i64>%Y
168}
169
170define <4 x i64> @load_sext_test2(ptr%ptr) {
171; X86-LABEL: load_sext_test2:
172; X86:       # %bb.0:
173; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
174; X86-NEXT:    vpmovsxbq (%eax), %ymm0
175; X86-NEXT:    retl
176;
177; X64-LABEL: load_sext_test2:
178; X64:       # %bb.0:
179; X64-NEXT:    vpmovsxbq (%rdi), %ymm0
180; X64-NEXT:    retq
181 %X = load <4 x i8>, ptr %ptr
182 %Y = sext <4 x i8> %X to <4 x i64>
183 ret <4 x i64>%Y
184}
185
186define <4 x i64> @load_sext_test3(ptr%ptr) {
187; X86-LABEL: load_sext_test3:
188; X86:       # %bb.0:
189; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
190; X86-NEXT:    vpmovsxwq (%eax), %ymm0
191; X86-NEXT:    retl
192;
193; X64-LABEL: load_sext_test3:
194; X64:       # %bb.0:
195; X64-NEXT:    vpmovsxwq (%rdi), %ymm0
196; X64-NEXT:    retq
197 %X = load <4 x i16>, ptr %ptr
198 %Y = sext <4 x i16> %X to <4 x i64>
199 ret <4 x i64>%Y
200}
201
202define <8 x i32> @load_sext_test4(ptr%ptr) {
203; X86-LABEL: load_sext_test4:
204; X86:       # %bb.0:
205; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
206; X86-NEXT:    vpmovsxwd (%eax), %ymm0
207; X86-NEXT:    retl
208;
209; X64-LABEL: load_sext_test4:
210; X64:       # %bb.0:
211; X64-NEXT:    vpmovsxwd (%rdi), %ymm0
212; X64-NEXT:    retq
213 %X = load <8 x i16>, ptr %ptr
214 %Y = sext <8 x i16> %X to <8 x i32>
215 ret <8 x i32>%Y
216}
217
218define <8 x i32> @load_sext_test5(ptr%ptr) {
219; X86-LABEL: load_sext_test5:
220; X86:       # %bb.0:
221; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
222; X86-NEXT:    vpmovsxbd (%eax), %ymm0
223; X86-NEXT:    retl
224;
225; X64-LABEL: load_sext_test5:
226; X64:       # %bb.0:
227; X64-NEXT:    vpmovsxbd (%rdi), %ymm0
228; X64-NEXT:    retq
229 %X = load <8 x i8>, ptr %ptr
230 %Y = sext <8 x i8> %X to <8 x i32>
231 ret <8 x i32>%Y
232}
233