xref: /llvm-project/llvm/test/CodeGen/X86/avx2-arith.ll (revision f0b3b6d15b2c0ee2cff2dd31dc075adb5d9a4ff7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64
4
5define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
6; CHECK-LABEL: test_vpaddq:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
9; CHECK-NEXT:    ret{{[l|q]}}
10  %x = add <4 x i64> %i, %j
11  ret <4 x i64> %x
12}
13
14define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
15; CHECK-LABEL: test_vpaddd:
16; CHECK:       # %bb.0:
17; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
18; CHECK-NEXT:    ret{{[l|q]}}
19  %x = add <8 x i32> %i, %j
20  ret <8 x i32> %x
21}
22
23define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
24; CHECK-LABEL: test_vpaddw:
25; CHECK:       # %bb.0:
26; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
27; CHECK-NEXT:    ret{{[l|q]}}
28  %x = add <16 x i16> %i, %j
29  ret <16 x i16> %x
30}
31
32define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
33; CHECK-LABEL: test_vpaddb:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
36; CHECK-NEXT:    ret{{[l|q]}}
37  %x = add <32 x i8> %i, %j
38  ret <32 x i8> %x
39}
40
41define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
42; CHECK-LABEL: test_vpsubq:
43; CHECK:       # %bb.0:
44; CHECK-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
45; CHECK-NEXT:    ret{{[l|q]}}
46  %x = sub <4 x i64> %i, %j
47  ret <4 x i64> %x
48}
49
50define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
51; CHECK-LABEL: test_vpsubd:
52; CHECK:       # %bb.0:
53; CHECK-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
54; CHECK-NEXT:    ret{{[l|q]}}
55  %x = sub <8 x i32> %i, %j
56  ret <8 x i32> %x
57}
58
59define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
60; CHECK-LABEL: test_vpsubw:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
63; CHECK-NEXT:    ret{{[l|q]}}
64  %x = sub <16 x i16> %i, %j
65  ret <16 x i16> %x
66}
67
68define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
69; CHECK-LABEL: test_vpsubb:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
72; CHECK-NEXT:    ret{{[l|q]}}
73  %x = sub <32 x i8> %i, %j
74  ret <32 x i8> %x
75}
76
77define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
78; CHECK-LABEL: test_vpmulld:
79; CHECK:       # %bb.0:
80; CHECK-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
81; CHECK-NEXT:    ret{{[l|q]}}
82  %x = mul <8 x i32> %i, %j
83  ret <8 x i32> %x
84}
85
86define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
87; CHECK-LABEL: test_vpmullw:
88; CHECK:       # %bb.0:
89; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
90; CHECK-NEXT:    ret{{[l|q]}}
91  %x = mul <16 x i16> %i, %j
92  ret <16 x i16> %x
93}
94
95define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
96; X86-LABEL: mul_v16i8:
97; X86:       # %bb.0:
98; X86-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
99; X86-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
100; X86-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
101; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
102; X86-NEXT:    vextracti128 $1, %ymm0, %xmm1
103; X86-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
104; X86-NEXT:    vzeroupper
105; X86-NEXT:    retl
106;
107; X64-LABEL: mul_v16i8:
108; X64:       # %bb.0:
109; X64-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
110; X64-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
111; X64-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
112; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
113; X64-NEXT:    vextracti128 $1, %ymm0, %xmm1
114; X64-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
115; X64-NEXT:    vzeroupper
116; X64-NEXT:    retq
117  %x = mul <16 x i8> %i, %j
118  ret <16 x i8> %x
119}
120
121define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
122; CHECK-LABEL: mul_v32i8:
123; CHECK:       # %bb.0:
124; CHECK-NEXT:    vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
125; CHECK-NEXT:    vpand %ymm2, %ymm1, %ymm3
126; CHECK-NEXT:    vpmaddubsw %ymm3, %ymm0, %ymm3
127; CHECK-NEXT:    vpand %ymm2, %ymm3, %ymm3
128; CHECK-NEXT:    vpandn %ymm1, %ymm2, %ymm1
129; CHECK-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0
130; CHECK-NEXT:    vpsllw $8, %ymm0, %ymm0
131; CHECK-NEXT:    vpor %ymm0, %ymm3, %ymm0
132; CHECK-NEXT:    ret{{[l|q]}}
133  %x = mul <32 x i8> %i, %j
134  ret <32 x i8> %x
135}
136
137define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
138; CHECK-LABEL: mul_v4i64:
139; CHECK:       # %bb.0:
140; CHECK-NEXT:    vpsrlq $32, %ymm0, %ymm2
141; CHECK-NEXT:    vpmuludq %ymm1, %ymm2, %ymm2
142; CHECK-NEXT:    vpsrlq $32, %ymm1, %ymm3
143; CHECK-NEXT:    vpmuludq %ymm3, %ymm0, %ymm3
144; CHECK-NEXT:    vpaddq %ymm2, %ymm3, %ymm2
145; CHECK-NEXT:    vpsllq $32, %ymm2, %ymm2
146; CHECK-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
147; CHECK-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
148; CHECK-NEXT:    ret{{[l|q]}}
149  %x = mul <4 x i64> %i, %j
150  ret <4 x i64> %x
151}
152
153define <8 x i32> @mul_const1(<8 x i32> %x) {
154; CHECK-LABEL: mul_const1:
155; CHECK:       # %bb.0:
156; CHECK-NEXT:    vpaddd %ymm0, %ymm0, %ymm0
157; CHECK-NEXT:    ret{{[l|q]}}
158  %y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
159  ret <8 x i32> %y
160}
161
162define <4 x i64> @mul_const2(<4 x i64> %x) {
163; CHECK-LABEL: mul_const2:
164; CHECK:       # %bb.0:
165; CHECK-NEXT:    vpsllq $2, %ymm0, %ymm0
166; CHECK-NEXT:    ret{{[l|q]}}
167  %y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4>
168  ret <4 x i64> %y
169}
170
171define <16 x i16> @mul_const3(<16 x i16> %x) {
172; CHECK-LABEL: mul_const3:
173; CHECK:       # %bb.0:
174; CHECK-NEXT:    vpsllw $3, %ymm0, %ymm0
175; CHECK-NEXT:    ret{{[l|q]}}
176  %y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
177  ret <16 x i16> %y
178}
179
180define <4 x i64> @mul_const4(<4 x i64> %x) {
181; CHECK-LABEL: mul_const4:
182; CHECK:       # %bb.0:
183; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
184; CHECK-NEXT:    vpsubq %ymm0, %ymm1, %ymm0
185; CHECK-NEXT:    ret{{[l|q]}}
186  %y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
187  ret <4 x i64> %y
188}
189
190define <8 x i32> @mul_const5(<8 x i32> %x) {
191; CHECK-LABEL: mul_const5:
192; CHECK:       # %bb.0:
193; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
194; CHECK-NEXT:    ret{{[l|q]}}
195  %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
196  ret <8 x i32> %y
197}
198
199define <8 x i32> @mul_const6(<8 x i32> %x) {
200; X86-LABEL: mul_const6:
201; X86:       # %bb.0:
202; X86-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
203; X86-NEXT:    retl
204;
205; X64-LABEL: mul_const6:
206; X64:       # %bb.0:
207; X64-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
208; X64-NEXT:    retq
209  %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0>
210  ret <8 x i32> %y
211}
212
213define <8 x i64> @mul_const7(<8 x i64> %x) {
214; CHECK-LABEL: mul_const7:
215; CHECK:       # %bb.0:
216; CHECK-NEXT:    vpaddq %ymm0, %ymm0, %ymm0
217; CHECK-NEXT:    vpaddq %ymm1, %ymm1, %ymm1
218; CHECK-NEXT:    ret{{[l|q]}}
219  %y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
220  ret <8 x i64> %y
221}
222
223define <8 x i16> @mul_const8(<8 x i16> %x) {
224; CHECK-LABEL: mul_const8:
225; CHECK:       # %bb.0:
226; CHECK-NEXT:    vpsllw $3, %xmm0, %xmm0
227; CHECK-NEXT:    ret{{[l|q]}}
228  %y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
229  ret <8 x i16> %y
230}
231
232define <8 x i32> @mul_const9(<8 x i32> %x) {
233; CHECK-LABEL: mul_const9:
234; CHECK:       # %bb.0:
235; CHECK-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [2,0]
236; CHECK-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
237; CHECK-NEXT:    ret{{[l|q]}}
238  %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
239  ret <8 x i32> %y
240}
241
242; ptr 0x01010101
243define <4 x i32> @mul_const10(<4 x i32> %x) {
244; CHECK-LABEL: mul_const10:
245; CHECK:       # %bb.0:
246; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [16843009,16843009,16843009,16843009]
247; CHECK-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
248; CHECK-NEXT:    ret{{[l|q]}}
249  %m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
250  ret <4 x i32> %m
251}
252
253; ptr 0x80808080
254define <4 x i32> @mul_const11(<4 x i32> %x) {
255; CHECK-LABEL: mul_const11:
256; CHECK:       # %bb.0:
257; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2155905152,2155905152,2155905152,2155905152]
258; CHECK-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
259; CHECK-NEXT:    ret{{[l|q]}}
260  %m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152>
261  ret <4 x i32> %m
262}
263