xref: /llvm-project/llvm/test/CodeGen/X86/widen_mul.ll (revision 0aef747b846586c29ed3285bbed20a3d607576fa)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX
7
8; Test multiplies of various narrow types.
9
10define <2 x i8> @mul_v2i8(<2 x i8> %x, <2 x i8> %y) {
11; SSE2-LABEL: mul_v2i8:
12; SSE2:       # %bb.0:
13; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
14; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
15; SSE2-NEXT:    pmullw %xmm1, %xmm0
16; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17; SSE2-NEXT:    packuswb %xmm0, %xmm0
18; SSE2-NEXT:    retq
19;
20; SSE41-LABEL: mul_v2i8:
21; SSE41:       # %bb.0:
22; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
23; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
24; SSE41-NEXT:    pmullw %xmm1, %xmm0
25; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
26; SSE41-NEXT:    retq
27;
28; AVX-LABEL: mul_v2i8:
29; AVX:       # %bb.0:
30; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
31; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
32; AVX-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
33; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
34; AVX-NEXT:    retq
35  %res = mul <2 x i8> %x, %y
36  ret <2 x i8> %res
37}
38
39define <4 x i8> @mul_v4i8(<4 x i8> %x, <4 x i8> %y) {
40; SSE2-LABEL: mul_v4i8:
41; SSE2:       # %bb.0:
42; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
43; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
44; SSE2-NEXT:    pmullw %xmm1, %xmm0
45; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
46; SSE2-NEXT:    packuswb %xmm0, %xmm0
47; SSE2-NEXT:    retq
48;
49; SSE41-LABEL: mul_v4i8:
50; SSE41:       # %bb.0:
51; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
52; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
53; SSE41-NEXT:    pmullw %xmm1, %xmm0
54; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
55; SSE41-NEXT:    retq
56;
57; AVX-LABEL: mul_v4i8:
58; AVX:       # %bb.0:
59; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
60; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
61; AVX-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
62; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
63; AVX-NEXT:    retq
64  %res = mul <4 x i8> %x, %y
65  ret <4 x i8> %res
66}
67
68define <8 x i8> @mul_v8i8(<8 x i8> %x, <8 x i8> %y) {
69; SSE2-LABEL: mul_v8i8:
70; SSE2:       # %bb.0:
71; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
72; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
73; SSE2-NEXT:    pmullw %xmm1, %xmm0
74; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
75; SSE2-NEXT:    packuswb %xmm0, %xmm0
76; SSE2-NEXT:    retq
77;
78; SSE41-LABEL: mul_v8i8:
79; SSE41:       # %bb.0:
80; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
81; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
82; SSE41-NEXT:    pmullw %xmm1, %xmm0
83; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
84; SSE41-NEXT:    retq
85;
86; AVX-LABEL: mul_v8i8:
87; AVX:       # %bb.0:
88; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
89; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
90; AVX-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
91; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
92; AVX-NEXT:    retq
93  %res = mul <8 x i8> %x, %y
94  ret <8 x i8> %res
95}
96
97define <2 x i16> @mul_v2i16(<2 x i16> %x, <2 x i16> %y) {
98; SSE-LABEL: mul_v2i16:
99; SSE:       # %bb.0:
100; SSE-NEXT:    pmullw %xmm1, %xmm0
101; SSE-NEXT:    retq
102;
103; AVX-LABEL: mul_v2i16:
104; AVX:       # %bb.0:
105; AVX-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
106; AVX-NEXT:    retq
107  %res = mul <2 x i16> %x, %y
108  ret <2 x i16> %res
109}
110
111define <4 x i16> @mul_v4i16(<4 x i16> %x, <4 x i16> %y) {
112; SSE-LABEL: mul_v4i16:
113; SSE:       # %bb.0:
114; SSE-NEXT:    pmullw %xmm1, %xmm0
115; SSE-NEXT:    retq
116;
117; AVX-LABEL: mul_v4i16:
118; AVX:       # %bb.0:
119; AVX-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
120; AVX-NEXT:    retq
121  %res = mul <4 x i16> %x, %y
122  ret <4 x i16> %res
123}
124
125define <2 x i32> @mul_v2i32(<2 x i32> %x, <2 x i32> %y) {
126; SSE2-LABEL: mul_v2i32:
127; SSE2:       # %bb.0:
128; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
129; SSE2-NEXT:    pmuludq %xmm1, %xmm0
130; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
131; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
132; SSE2-NEXT:    pmuludq %xmm2, %xmm1
133; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
134; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
135; SSE2-NEXT:    retq
136;
137; SSE41-LABEL: mul_v2i32:
138; SSE41:       # %bb.0:
139; SSE41-NEXT:    pmulld %xmm1, %xmm0
140; SSE41-NEXT:    retq
141;
142; AVX-LABEL: mul_v2i32:
143; AVX:       # %bb.0:
144; AVX-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
145; AVX-NEXT:    retq
146  %res = mul <2 x i32> %x, %y
147  ret <2 x i32> %res
148}
149