xref: /llvm-project/llvm/test/CodeGen/X86/mulo-pow2.ll (revision 5a2a14db0bc4cfd4f3c8f2fbac7ca9bc93a23699)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
3
4declare { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32>, <4 x i32>)
5declare { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32>, <4 x i32>)
6
7define <4 x i32> @umul_v4i32_0(<4 x i32> %a, <4 x i32> %b) nounwind {
8; AVX-LABEL: umul_v4i32_0:
9; AVX:       # %bb.0:
10; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
11; AVX-NEXT:    retq
12    %x = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer)
13    %y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
14    %z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
15    %u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
16    ret <4 x i32> %u
17}
18
19define <4 x i32> @umul_v4i32_1(<4 x i32> %a, <4 x i32> %b) nounwind {
20; AVX-LABEL: umul_v4i32_1:
21; AVX:       # %bb.0:
22; AVX-NEXT:    retq
23    %x = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
24    %y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
25    %z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
26    %u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
27    ret <4 x i32> %u
28}
29
30define <4 x i32> @umul_v4i32_2(<4 x i32> %a, <4 x i32> %b) nounwind {
31; AVX-LABEL: umul_v4i32_2:
32; AVX:       # %bb.0:
33; AVX-NEXT:    vpaddd %xmm0, %xmm0, %xmm2
34; AVX-NEXT:    vpmaxud %xmm0, %xmm2, %xmm0
35; AVX-NEXT:    vpcmpeqd %xmm0, %xmm2, %xmm0
36; AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
37; AVX-NEXT:    retq
38    %x = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
39    %y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
40    %z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
41    %u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
42    ret <4 x i32> %u
43}
44
45define <4 x i32> @umul_v4i32_8(<4 x i32> %a, <4 x i32> %b) nounwind {
46; AVX-LABEL: umul_v4i32_8:
47; AVX:       # %bb.0:
48; AVX-NEXT:    vpslld $3, %xmm0, %xmm2
49; AVX-NEXT:    vpsrld $3, %xmm2, %xmm3
50; AVX-NEXT:    vpcmpeqd %xmm0, %xmm3, %xmm0
51; AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
52; AVX-NEXT:    retq
53    %x = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
54    %y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
55    %z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
56    %u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
57    ret <4 x i32> %u
58}
59
60define <4 x i32> @umul_v4i32_2pow31(<4 x i32> %a, <4 x i32> %b) nounwind {
61; AVX-LABEL: umul_v4i32_2pow31:
62; AVX:       # %bb.0:
63; AVX-NEXT:    vpslld $31, %xmm0, %xmm2
64; AVX-NEXT:    vpsrld $31, %xmm2, %xmm3
65; AVX-NEXT:    vpcmpeqd %xmm0, %xmm3, %xmm0
66; AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
67; AVX-NEXT:    retq
68    %x = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>)
69    %y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
70    %z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
71    %u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
72    ret <4 x i32> %u
73}
74
75define <4 x i32> @smul_v4i32_0(<4 x i32> %a, <4 x i32> %b) nounwind {
76; AVX-LABEL: smul_v4i32_0:
77; AVX:       # %bb.0:
78; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
79; AVX-NEXT:    retq
80    %x = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer)
81    %y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
82    %z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
83    %u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
84    ret <4 x i32> %u
85}
86
87define <4 x i32> @smul_v4i32_1(<4 x i32> %a, <4 x i32> %b) nounwind {
88; AVX-LABEL: smul_v4i32_1:
89; AVX:       # %bb.0:
90; AVX-NEXT:    retq
91    %x = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
92    %y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
93    %z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
94    %u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
95    ret <4 x i32> %u
96}
97
98define <4 x i32> @smul_v4i32_2(<4 x i32> %a, <4 x i32> %b) nounwind {
99; AVX-LABEL: smul_v4i32_2:
100; AVX:       # %bb.0:
101; AVX-NEXT:    vpaddd %xmm0, %xmm0, %xmm2
102; AVX-NEXT:    vpcmpgtd %xmm2, %xmm0, %xmm3
103; AVX-NEXT:    vpxor %xmm3, %xmm0, %xmm0
104; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
105; AVX-NEXT:    retq
106    %x = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
107    %y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
108    %z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
109    %u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
110    ret <4 x i32> %u
111}
112
113define <4 x i32> @smul_v4i32_8(<4 x i32> %a, <4 x i32> %b) nounwind {
114; AVX-LABEL: smul_v4i32_8:
115; AVX:       # %bb.0:
116; AVX-NEXT:    vpslld $3, %xmm0, %xmm2
117; AVX-NEXT:    vpsrad $3, %xmm2, %xmm3
118; AVX-NEXT:    vpcmpeqd %xmm0, %xmm3, %xmm0
119; AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
120; AVX-NEXT:    retq
121    %x = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
122    %y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
123    %z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
124    %u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
125    ret <4 x i32> %u
126}
127
128define <4 x i32> @smul_v4i32_2pow31(<4 x i32> %a, <4 x i32> %b) nounwind {
129; AVX-LABEL: smul_v4i32_2pow31:
130; AVX:       # %bb.0:
131; AVX-NEXT:    vpslld $31, %xmm0, %xmm2
132; AVX-NEXT:    vpsrld $31, %xmm2, %xmm3
133; AVX-NEXT:    vpcmpeqd %xmm0, %xmm3, %xmm0
134; AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
135; AVX-NEXT:    retq
136    %x = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>)
137    %y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
138    %z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
139    %u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
140    ret <4 x i32> %u
141}
142