xref: /llvm-project/llvm/test/Transforms/InstCombine/mul_fold.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3
4declare i8 @use8(i8)
5declare i16 @use16(i16)
6declare i32 @use32(i32)
7declare i64 @use64(i64)
8declare i128 @use128(i128)
9declare i130 @use130(i130)
10declare <2 x i8> @use_v2i8(<2 x i8>)
11
12; The following 16 cases are used for cover the commuted operand ADD and MUL
13; with extra uses to more of these tests to exercise those cases.
14;  The different _Ax suffix hints the variety of combinations MUL
15;  The different _Bx suffix hints the variety of combinations ADD
16;  4 tests that use in0/in1 with different commutes
17define i8 @mul8_low_A0_B0(i8 %in0, i8 %in1) {
18; CHECK-LABEL: @mul8_low_A0_B0(
19; CHECK-NEXT:    [[RETLO:%.*]] = mul i8 [[IN0:%.*]], [[IN1:%.*]]
20; CHECK-NEXT:    ret i8 [[RETLO]]
21;
22  %In0Lo = and i8 %in0, 15
23  %In0Hi = lshr i8 %in0, 4
24  %In1Lo = and i8 %in1, 15
25  %In1Hi = lshr i8 %in1, 4
26  %m10 = mul i8 %In1Hi, %in0
27  %m01 = mul i8 %In0Hi, %in1
28  %m00 = mul i8 %In1Lo, %In0Lo
29  %addc = add i8 %m10, %m01
30  %shl = shl i8 %addc, 4
31  %retLo = add i8 %shl, %m00
32  ret i8 %retLo
33}
34
35define i8 @mul8_low_A0_B1(i8 %p, i8 %in1) {
36; CHECK-LABEL: @mul8_low_A0_B1(
37; CHECK-NEXT:    [[IN0:%.*]] = call i8 @use8(i8 [[P:%.*]])
38; CHECK-NEXT:    [[RETLO:%.*]] = mul i8 [[IN0]], [[IN1:%.*]]
39; CHECK-NEXT:    ret i8 [[RETLO]]
40;
41  %in0 = call i8 @use8(i8 %p) ; thwart complexity-based canonicalization
42  %In0Lo = and i8 %in0, 15
43  %In0Hi = lshr i8 %in0, 4
44  %In1Lo = and i8 %in1, 15
45  %In1Hi = lshr i8 %in1, 4
46  %m10 = mul i8 %in0, %In1Hi
47  %m01 = mul i8 %In0Hi, %in1
48  %m00 = mul i8 %In1Lo, %In0Lo
49  %addc = add i8 %m10, %m01
50  %shl = shl i8 %addc, 4
51  %retLo = add i8 %m00, %shl
52  ret i8 %retLo
53}
54
55define i8 @mul8_low_A0_B2(i8 %in0, i8 %p) {
56; CHECK-LABEL: @mul8_low_A0_B2(
57; CHECK-NEXT:    [[IN1:%.*]] = call i8 @use8(i8 [[P:%.*]])
58; CHECK-NEXT:    [[RETLO:%.*]] = mul i8 [[IN0:%.*]], [[IN1]]
59; CHECK-NEXT:    ret i8 [[RETLO]]
60;
61
62  %in1 = call i8 @use8(i8 %p) ; thwart complexity-based canonicalization
63  %In0Lo = and i8 %in0, 15
64  %In0Hi = lshr i8 %in0, 4
65  %In1Lo = and i8 %in1, 15
66  %In1Hi = lshr i8 %in1, 4
67  %m10 = mul i8 %In1Hi, %in0
68  %m01 = mul i8 %in1, %In0Hi
69  %m00 = mul i8 %In1Lo, %In0Lo
70  %addc = add i8 %m01, %m10
71  %shl = shl i8 %addc, 4
72  %retLo = add i8 %shl, %m00
73  ret i8 %retLo
74}
75
76define i8 @mul8_low_A0_B3(i8 %p, i8 %q) {
77; CHECK-LABEL: @mul8_low_A0_B3(
78; CHECK-NEXT:    [[IN0:%.*]] = call i8 @use8(i8 [[P:%.*]])
79; CHECK-NEXT:    [[IN1:%.*]] = call i8 @use8(i8 [[Q:%.*]])
80; CHECK-NEXT:    [[RETLO:%.*]] = mul i8 [[IN0]], [[IN1]]
81; CHECK-NEXT:    ret i8 [[RETLO]]
82;
83  %in0 = call i8 @use8(i8 %p) ; thwart complexity-based canonicalization
84  %in1 = call i8 @use8(i8 %q) ; thwart complexity-based canonicalization
85  %In0Lo = and i8 %in0, 15
86  %In0Hi = lshr i8 %in0, 4
87  %In1Lo = and i8 %in1, 15
88  %In1Hi = lshr i8 %in1, 4
89  %m10 = mul i8 %in0, %In1Hi
90  %m01 = mul i8 %in1, %In0Hi
91  %m00 = mul i8 %In1Lo, %In0Lo
92  %addc = add i8 %m01, %m10
93  %shl = shl i8 %addc, 4
94  %retLo = add i8 %m00, %shl
95  ret i8 %retLo
96}
97
98; 4 tests that use In0Lo/In1Lo with different commutes
99define i16 @mul16_low_A1_B0(i16 %in0, i16 %in1) {
100; CHECK-LABEL: @mul16_low_A1_B0(
101; CHECK-NEXT:    [[IN0LO:%.*]] = and i16 [[IN0:%.*]], 255
102; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i16 [[IN0]], 8
103; CHECK-NEXT:    [[IN1LO:%.*]] = and i16 [[IN1:%.*]], 255
104; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i16 [[IN1]], 8
105; CHECK-NEXT:    [[M10:%.*]] = mul nuw i16 [[IN0LO]], [[IN1HI]]
106; CHECK-NEXT:    call void @use16(i16 [[M10]])
107; CHECK-NEXT:    [[M01:%.*]] = mul nuw i16 [[IN1LO]], [[IN0HI]]
108; CHECK-NEXT:    call void @use16(i16 [[M01]])
109; CHECK-NEXT:    [[RETLO:%.*]] = mul i16 [[IN0]], [[IN1]]
110; CHECK-NEXT:    ret i16 [[RETLO]]
111;
112  %In0Lo = and i16 %in0, 255
113  %In0Hi = lshr i16 %in0, 8
114  %In1Lo = and i16 %in1, 255
115  %In1Hi = lshr i16 %in1, 8
116  %m10 = mul i16 %In0Lo, %In1Hi
117  call void @use16(i16 %m10)
118  %m01 = mul i16 %In1Lo, %In0Hi
119  call void @use16(i16 %m01)
120  %m00 = mul i16 %In1Lo, %In0Lo
121  %addc = add i16 %m10, %m01
122  %shl = shl i16 %addc, 8
123  %retLo = add i16 %shl, %m00
124  ret i16 %retLo
125}
126
127define i16 @mul16_low_A1_B1(i16 %in0, i16 %in1) {
128; CHECK-LABEL: @mul16_low_A1_B1(
129; CHECK-NEXT:    [[IN0LO:%.*]] = and i16 [[IN0:%.*]], 255
130; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i16 [[IN0]], 8
131; CHECK-NEXT:    [[IN1LO:%.*]] = and i16 [[IN1:%.*]], 255
132; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i16 [[IN1]], 8
133; CHECK-NEXT:    [[M10:%.*]] = mul nuw i16 [[IN0LO]], [[IN1HI]]
134; CHECK-NEXT:    call void @use16(i16 [[M10]])
135; CHECK-NEXT:    [[M01:%.*]] = mul nuw i16 [[IN0HI]], [[IN1LO]]
136; CHECK-NEXT:    call void @use16(i16 [[M01]])
137; CHECK-NEXT:    [[RETLO:%.*]] = mul i16 [[IN0]], [[IN1]]
138; CHECK-NEXT:    ret i16 [[RETLO]]
139;
140  %In0Lo = and i16 %in0, 255
141  %In0Hi = lshr i16 %in0, 8
142  %In1Lo = and i16 %in1, 255
143  %In1Hi = lshr i16 %in1, 8
144  %m10 = mul i16 %In0Lo, %In1Hi
145  call void @use16(i16 %m10)
146  %m01 = mul i16 %In0Hi, %In1Lo
147  call void @use16(i16 %m01)
148  %m00 = mul i16 %In1Lo, %In0Lo
149  %addc = add i16 %m10, %m01
150  %shl = shl i16 %addc, 8
151  %retLo = add i16 %m00, %shl
152  ret i16 %retLo
153}
154
155define i16 @mul16_low_A1_B2(i16 %in0, i16 %in1) {
156; CHECK-LABEL: @mul16_low_A1_B2(
157; CHECK-NEXT:    [[IN0LO:%.*]] = and i16 [[IN0:%.*]], 255
158; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i16 [[IN0]], 8
159; CHECK-NEXT:    [[IN1LO:%.*]] = and i16 [[IN1:%.*]], 255
160; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i16 [[IN1]], 8
161; CHECK-NEXT:    [[M10:%.*]] = mul nuw i16 [[IN1HI]], [[IN0LO]]
162; CHECK-NEXT:    call void @use16(i16 [[M10]])
163; CHECK-NEXT:    [[M01:%.*]] = mul nuw i16 [[IN1LO]], [[IN0HI]]
164; CHECK-NEXT:    call void @use16(i16 [[M01]])
165; CHECK-NEXT:    [[RETLO:%.*]] = mul i16 [[IN0]], [[IN1]]
166; CHECK-NEXT:    ret i16 [[RETLO]]
167;
168  %In0Lo = and i16 %in0, 255
169  %In0Hi = lshr i16 %in0, 8
170  %In1Lo = and i16 %in1, 255
171  %In1Hi = lshr i16 %in1, 8
172  %m10 = mul i16 %In1Hi, %In0Lo
173  call void @use16(i16 %m10)
174  %m01 = mul i16 %In1Lo, %In0Hi
175  call void @use16(i16 %m01)
176  %m00 = mul i16 %In1Lo, %In0Lo
177  %addc = add i16 %m01, %m10
178  %shl = shl i16 %addc, 8
179  %retLo = add i16 %shl, %m00
180  ret i16 %retLo
181}
182
183define i16 @mul16_low_A1_B3(i16 %in0, i16 %in1) {
184; CHECK-LABEL: @mul16_low_A1_B3(
185; CHECK-NEXT:    [[IN0LO:%.*]] = and i16 [[IN0:%.*]], 255
186; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i16 [[IN0]], 8
187; CHECK-NEXT:    [[IN1LO:%.*]] = and i16 [[IN1:%.*]], 255
188; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i16 [[IN1]], 8
189; CHECK-NEXT:    [[M10:%.*]] = mul nuw i16 [[IN0LO]], [[IN1HI]]
190; CHECK-NEXT:    call void @use16(i16 [[M10]])
191; CHECK-NEXT:    [[M01:%.*]] = mul nuw i16 [[IN1LO]], [[IN0HI]]
192; CHECK-NEXT:    call void @use16(i16 [[M01]])
193; CHECK-NEXT:    [[RETLO:%.*]] = mul i16 [[IN0]], [[IN1]]
194; CHECK-NEXT:    ret i16 [[RETLO]]
195;
196  %In0Lo = and i16 %in0, 255
197  %In0Hi = lshr i16 %in0, 8
198  %In1Lo = and i16 %in1, 255
199  %In1Hi = lshr i16 %in1, 8
200  %m10 = mul i16 %In0Lo, %In1Hi
201  call void @use16(i16 %m10)
202  %m01 = mul i16 %In1Lo, %In0Hi
203  call void @use16(i16 %m01)
204  %m00 = mul i16 %In1Lo, %In0Lo
205  %addc = add i16 %m01, %m10
206  %shl = shl i16 %addc, 8
207  %retLo = add i16 %m00, %shl
208  ret i16 %retLo
209}
210
211; 4 tests that use In0Lo/in1 with different commutes
212define i32 @mul32_low_A2_B0(i32 %in0, i32 %in1) {
213; CHECK-LABEL: @mul32_low_A2_B0(
214; CHECK-NEXT:    [[IN0LO:%.*]] = and i32 [[IN0:%.*]], 65535
215; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i32 [[IN1:%.*]], 16
216; CHECK-NEXT:    [[M10:%.*]] = mul nuw i32 [[IN1HI]], [[IN0LO]]
217; CHECK-NEXT:    call void @use32(i32 [[M10]])
218; CHECK-NEXT:    [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]]
219; CHECK-NEXT:    ret i32 [[RETLO]]
220;
221  %In0Lo = and i32 %in0, 65535
222  %In0Hi = lshr i32 %in0, 16
223  %In1Lo = and i32 %in1, 65535
224  %In1Hi = lshr i32 %in1, 16
225  %m10 = mul i32 %In1Hi, %In0Lo
226  call void @use32(i32 %m10)
227  %m01 = mul i32 %In0Hi, %in1
228  %m00 = mul i32 %In1Lo, %In0Lo
229  %addc = add i32 %m10, %m01
230  %shl = shl i32 %addc, 16
231  %retLo = add i32 %shl, %m00
232  ret i32 %retLo
233}
234
235define i32 @mul32_low_A2_B1(i32 %in0, i32 %in1) {
236; CHECK-LABEL: @mul32_low_A2_B1(
237; CHECK-NEXT:    [[IN0LO:%.*]] = and i32 [[IN0:%.*]], 65535
238; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i32 [[IN1:%.*]], 16
239; CHECK-NEXT:    [[M10:%.*]] = mul nuw i32 [[IN1HI]], [[IN0LO]]
240; CHECK-NEXT:    call void @use32(i32 [[M10]])
241; CHECK-NEXT:    [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]]
242; CHECK-NEXT:    ret i32 [[RETLO]]
243;
244  %In0Lo = and i32 %in0, 65535
245  %In0Hi = lshr i32 %in0, 16
246  %In1Lo = and i32 %in1, 65535
247  %In1Hi = lshr i32 %in1, 16
248  %m10 = mul i32 %In1Hi, %In0Lo
249  call void @use32(i32 %m10)
250  %m01 = mul i32 %In0Hi, %in1
251  %m00 = mul i32 %In1Lo, %In0Lo
252  %addc = add i32 %m10, %m01
253  %shl = shl i32 %addc, 16
254  %retLo = add i32 %m00, %shl
255  ret i32 %retLo
256}
257
258define i32 @mul32_low_A2_B2(i32 %in0, i32 %p) {
259; CHECK-LABEL: @mul32_low_A2_B2(
260; CHECK-NEXT:    [[IN1:%.*]] = call i32 @use32(i32 [[P:%.*]])
261; CHECK-NEXT:    [[IN0LO:%.*]] = and i32 [[IN0:%.*]], 65535
262; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i32 [[IN1]], 16
263; CHECK-NEXT:    [[M10:%.*]] = mul nuw i32 [[IN0LO]], [[IN1HI]]
264; CHECK-NEXT:    call void @use32(i32 [[M10]])
265; CHECK-NEXT:    [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]]
266; CHECK-NEXT:    ret i32 [[RETLO]]
267;
268  %in1 = call i32 @use32(i32 %p) ; thwart complexity-based canonicalization
269  %In0Lo = and i32 %in0, 65535
270  %In0Hi = lshr i32 %in0, 16
271  %In1Lo = and i32 %in1, 65535
272  %In1Hi = lshr i32 %in1, 16
273  %m10 = mul i32 %In0Lo, %In1Hi
274  call void @use32(i32 %m10)
275  %m01 = mul i32 %in1, %In0Hi
276  %m00 = mul i32 %In1Lo, %In0Lo
277  %addc = add i32 %m01, %m10
278  %shl = shl i32 %addc, 16
279  %retLo = add i32 %shl, %m00
280  ret i32 %retLo
281}
282
283define i32 @mul32_low_A2_B3(i32 %in0, i32 %p) {
284; CHECK-LABEL: @mul32_low_A2_B3(
285; CHECK-NEXT:    [[IN1:%.*]] = call i32 @use32(i32 [[P:%.*]])
286; CHECK-NEXT:    [[IN0LO:%.*]] = and i32 [[IN0:%.*]], 65535
287; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i32 [[IN1]], 16
288; CHECK-NEXT:    [[M10:%.*]] = mul nuw i32 [[IN1HI]], [[IN0LO]]
289; CHECK-NEXT:    call void @use32(i32 [[M10]])
290; CHECK-NEXT:    [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]]
291; CHECK-NEXT:    ret i32 [[RETLO]]
292;
293  %in1 = call i32 @use32(i32 %p) ; thwart complexity-based canonicalization
294  %In0Lo = and i32 %in0, 65535
295  %In0Hi = lshr i32 %in0, 16
296  %In1Lo = and i32 %in1, 65535
297  %In1Hi = lshr i32 %in1, 16
298  %m10 = mul i32 %In1Hi, %In0Lo
299  call void @use32(i32 %m10)
300  %m01 = mul i32 %in1, %In0Hi
301  %m00 = mul i32 %In1Lo, %In0Lo
302  %addc = add i32 %m01, %m10
303  %shl = shl i32 %addc, 16
304  %retLo = add i32 %m00, %shl
305  ret i32 %retLo
306}
307
308; 4 tests that use in0/In1Lo with different commutes
309define i64 @mul64_low_A3_B0(i64 %in0, i64 %in1) {
310; CHECK-LABEL: @mul64_low_A3_B0(
311; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i64 [[IN0:%.*]], 32
312; CHECK-NEXT:    [[IN1LO:%.*]] = and i64 [[IN1:%.*]], 4294967295
313; CHECK-NEXT:    [[M01:%.*]] = mul nuw i64 [[IN0HI]], [[IN1LO]]
314; CHECK-NEXT:    call void @use64(i64 [[M01]])
315; CHECK-NEXT:    [[RETLO:%.*]] = mul i64 [[IN0]], [[IN1]]
316; CHECK-NEXT:    ret i64 [[RETLO]]
317;
318  %In0Lo = and i64 %in0, 4294967295
319  %In0Hi = lshr i64 %in0, 32
320  %In1Lo = and i64 %in1, 4294967295
321  %In1Hi = lshr i64 %in1, 32
322  %m10 = mul i64 %In1Hi, %in0
323  %m01 = mul i64 %In0Hi, %In1Lo
324  call void @use64(i64 %m01)
325  %m00 = mul i64 %In1Lo, %In0Lo
326  %addc = add i64 %m10, %m01
327  %shl = shl i64 %addc, 32
328  %retLo = add i64 %shl, %m00
329  ret i64 %retLo
330}
331
332define i64 @mul64_low_A3_B1(i64 %in0, i64 %in1) {
333; CHECK-LABEL: @mul64_low_A3_B1(
334; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i64 [[IN0:%.*]], 32
335; CHECK-NEXT:    [[IN1LO:%.*]] = and i64 [[IN1:%.*]], 4294967295
336; CHECK-NEXT:    [[M01:%.*]] = mul nuw i64 [[IN0HI]], [[IN1LO]]
337; CHECK-NEXT:    call void @use64(i64 [[M01]])
338; CHECK-NEXT:    [[RETLO:%.*]] = mul i64 [[IN0]], [[IN1]]
339; CHECK-NEXT:    ret i64 [[RETLO]]
340;
341  %In0Lo = and i64 %in0, 4294967295
342  %In0Hi = lshr i64 %in0, 32
343  %In1Lo = and i64 %in1, 4294967295
344  %In1Hi = lshr i64 %in1, 32
345  %m10 = mul i64 %In1Hi, %in0
346  %m01 = mul i64 %In0Hi, %In1Lo
347  call void @use64(i64 %m01)
348  %m00 = mul i64 %In1Lo, %In0Lo
349  %addc = add i64 %m10, %m01
350  %shl = shl i64 %addc, 32
351  %retLo = add i64 %m00, %shl
352  ret i64 %retLo
353}
354
355define i64 @mul64_low_A3_B2(i64 %p, i64 %in1) {
356; CHECK-LABEL: @mul64_low_A3_B2(
357; CHECK-NEXT:    [[IN0:%.*]] = call i64 @use64(i64 [[P:%.*]])
358; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i64 [[IN0]], 32
359; CHECK-NEXT:    [[IN1LO:%.*]] = and i64 [[IN1:%.*]], 4294967295
360; CHECK-NEXT:    [[M01:%.*]] = mul nuw i64 [[IN0HI]], [[IN1LO]]
361; CHECK-NEXT:    call void @use64(i64 [[M01]])
362; CHECK-NEXT:    [[RETLO:%.*]] = mul i64 [[IN0]], [[IN1]]
363; CHECK-NEXT:    ret i64 [[RETLO]]
364;
365  %in0 = call i64 @use64(i64 %p) ; thwart complexity-based canonicalization
366  %In0Lo = and i64 %in0, 4294967295
367  %In0Hi = lshr i64 %in0, 32
368  %In1Lo = and i64 %in1, 4294967295
369  %In1Hi = lshr i64 %in1, 32
370  %m10 = mul i64 %in0, %In1Hi
371  %m01 = mul i64 %In0Hi, %In1Lo
372  call void @use64(i64 %m01)
373  %m00 = mul i64 %In1Lo, %In0Lo
374  %addc = add i64 %m01, %m10
375  %shl = shl i64 %addc, 32
376  %retLo = add i64 %shl, %m00
377  ret i64 %retLo
378}
379
380define i64 @mul64_low_A3_B3(i64 %p, i64 %in1) {
381; CHECK-LABEL: @mul64_low_A3_B3(
382; CHECK-NEXT:    [[IN0:%.*]] = call i64 @use64(i64 [[P:%.*]])
383; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i64 [[IN0]], 32
384; CHECK-NEXT:    [[IN1LO:%.*]] = and i64 [[IN1:%.*]], 4294967295
385; CHECK-NEXT:    [[M01:%.*]] = mul nuw i64 [[IN1LO]], [[IN0HI]]
386; CHECK-NEXT:    call void @use64(i64 [[M01]])
387; CHECK-NEXT:    [[RETLO:%.*]] = mul i64 [[IN0]], [[IN1]]
388; CHECK-NEXT:    ret i64 [[RETLO]]
389;
390  %in0 = call i64 @use64(i64 %p) ; thwart complexity-based canonicalization
391  %In0Lo = and i64 %in0, 4294967295
392  %In0Hi = lshr i64 %in0, 32
393  %In1Lo = and i64 %in1, 4294967295
394  %In1Hi = lshr i64 %in1, 32
395  %m10 = mul i64 %in0, %In1Hi
396  %m01 = mul i64 %In1Lo, %In0Hi
397  call void @use64(i64 %m01)
398  %m00 = mul i64 %In1Lo, %In0Lo
399  %addc = add i64 %m01, %m10
400  %shl = shl i64 %addc, 32
401  %retLo = add i64 %m00, %shl
402  ret i64 %retLo
403}
404
405define i32 @mul32_low_one_extra_user(i32 %in0, i32 %in1) {
406; CHECK-LABEL: @mul32_low_one_extra_user(
407; CHECK-NEXT:    [[IN0LO:%.*]] = and i32 [[IN0:%.*]], 65535
408; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i32 [[IN0]], 16
409; CHECK-NEXT:    [[IN1LO:%.*]] = and i32 [[IN1:%.*]], 65535
410; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i32 [[IN1]], 16
411; CHECK-NEXT:    [[M10:%.*]] = mul nuw i32 [[IN1HI]], [[IN0LO]]
412; CHECK-NEXT:    [[M01:%.*]] = mul nuw i32 [[IN1LO]], [[IN0HI]]
413; CHECK-NEXT:    [[ADDC:%.*]] = add i32 [[M10]], [[M01]]
414; CHECK-NEXT:    call void @use32(i32 [[ADDC]])
415; CHECK-NEXT:    [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]]
416; CHECK-NEXT:    ret i32 [[RETLO]]
417;
418  %In0Lo = and i32 %in0, 65535
419  %In0Hi = lshr i32 %in0, 16
420  %In1Lo = and i32 %in1, 65535
421  %In1Hi = lshr i32 %in1, 16
422  %m10 = mul i32 %In1Hi, %In0Lo
423  %m01 = mul i32 %In1Lo, %In0Hi
424  %m00 = mul i32 %In1Lo, %In0Lo
425  %addc = add i32 %m10, %m01
426  call void @use32(i32 %addc)
427  %shl = shl i32 %addc, 16
428  %retLo = add i32 %shl, %m00
429  ret i32 %retLo
430}
431
432; The following are variety types of target cases
433; https://alive2.llvm.org/ce/z/2BqKLt
434define i8 @mul8_low(i8 %in0, i8 %in1) {
435; CHECK-LABEL: @mul8_low(
436; CHECK-NEXT:    [[RETLO:%.*]] = mul i8 [[IN0:%.*]], [[IN1:%.*]]
437; CHECK-NEXT:    ret i8 [[RETLO]]
438;
439  %In0Lo = and i8 %in0, 15
440  %In0Hi = lshr i8 %in0, 4
441  %In1Lo = and i8 %in1, 15
442  %In1Hi = lshr i8 %in1, 4
443  %m10 = mul i8 %In1Hi, %In0Lo
444  %m01 = mul i8 %In1Lo, %In0Hi
445  %m00 = mul i8 %In1Lo, %In0Lo
446  %addc = add i8 %m10, %m01
447  %shl = shl i8 %addc, 4
448  %retLo = add i8 %shl, %m00
449  ret i8 %retLo
450}
451
452define i16 @mul16_low(i16 %in0, i16 %in1) {
453; CHECK-LABEL: @mul16_low(
454; CHECK-NEXT:    [[RETLO:%.*]] = mul i16 [[IN0:%.*]], [[IN1:%.*]]
455; CHECK-NEXT:    ret i16 [[RETLO]]
456;
457  %In0Lo = and i16 %in0, 255
458  %In0Hi = lshr i16 %in0, 8
459  %In1Lo = and i16 %in1, 255
460  %In1Hi = lshr i16 %in1, 8
461  %m10 = mul i16 %In1Hi, %In0Lo
462  %m01 = mul i16 %In1Lo, %In0Hi
463  %m00 = mul i16 %In1Lo, %In0Lo
464  %addc = add i16 %m10, %m01
465  %shl = shl i16 %addc, 8
466  %retLo = add i16 %shl, %m00
467  ret i16 %retLo
468}
469
470define i32 @mul32_low(i32 %in0, i32 %in1) {
471; CHECK-LABEL: @mul32_low(
472; CHECK-NEXT:    [[RETLO:%.*]] = mul i32 [[IN0:%.*]], [[IN1:%.*]]
473; CHECK-NEXT:    ret i32 [[RETLO]]
474;
475  %In0Lo = and i32 %in0, 65535
476  %In0Hi = lshr i32 %in0, 16
477  %In1Lo = and i32 %in1, 65535
478  %In1Hi = lshr i32 %in1, 16
479  %m10 = mul i32 %In1Hi, %In0Lo
480  %m01 = mul i32 %In1Lo, %In0Hi
481  %m00 = mul i32 %In1Lo, %In0Lo
482  %addc = add i32 %m10, %m01
483  %shl = shl i32 %addc, 16
484  %retLo = add i32 %shl, %m00
485  ret i32 %retLo
486}
487
488define i64 @mul64_low(i64 %in0, i64 %in1) {
489; CHECK-LABEL: @mul64_low(
490; CHECK-NEXT:    [[RETLO:%.*]] = mul i64 [[IN0:%.*]], [[IN1:%.*]]
491; CHECK-NEXT:    ret i64 [[RETLO]]
492;
493  %In0Lo = and i64 %in0, 4294967295
494  %In0Hi = lshr i64 %in0, 32
495  %In1Lo = and i64 %in1, 4294967295
496  %In1Hi = lshr i64 %in1, 32
497  %m10 = mul i64 %In1Hi, %In0Lo
498  %m01 = mul i64 %In1Lo, %In0Hi
499  %m00 = mul i64 %In1Lo, %In0Lo
500  %addc = add i64 %m10, %m01
501  %shl = shl i64 %addc, 32
502  %retLo = add i64 %shl, %m00
503  ret i64 %retLo
504}
505
506define i128 @mul128_low(i128 %in0, i128 %in1) {
507; CHECK-LABEL: @mul128_low(
508; CHECK-NEXT:    [[RETLO:%.*]] = mul i128 [[IN0:%.*]], [[IN1:%.*]]
509; CHECK-NEXT:    ret i128 [[RETLO]]
510;
511  %In0Lo = and i128 %in0, 18446744073709551615
512  %In0Hi = lshr i128 %in0, 64
513  %In1Lo = and i128 %in1, 18446744073709551615
514  %In1Hi = lshr i128 %in1, 64
515  %m10 = mul i128 %In1Hi, %In0Lo
516  %m01 = mul i128 %In1Lo, %In0Hi
517  %m00 = mul i128 %In1Lo, %In0Lo
518  %addc = add i128 %m10, %m01
519  %shl = shl i128 %addc, 64
520  %retLo = add i128 %shl, %m00
521  ret i128 %retLo
522}
523
524; Support vector type
525define <2 x i8> @mul_v2i8_low(<2 x i8> %in0, <2 x i8> %in1) {
526; CHECK-LABEL: @mul_v2i8_low(
527; CHECK-NEXT:    [[RETLO:%.*]] = mul <2 x i8> [[IN0:%.*]], [[IN1:%.*]]
528; CHECK-NEXT:    ret <2 x i8> [[RETLO]]
529;
530  %In0Lo = and <2 x i8> %in0, <i8 15, i8 15>
531  %In0Hi = lshr <2 x i8> %in0, <i8 4, i8 4>
532  %In1Lo = and <2 x i8> %in1, <i8 15, i8 15>
533  %In1Hi = lshr <2 x i8> %in1, <i8 4, i8 4>
534  %m10 = mul <2 x i8> %In1Hi, %In0Lo
535  %m01 = mul <2 x i8> %In1Lo, %In0Hi
536  %m00 = mul <2 x i8> %In1Lo, %In0Lo
537  %addc = add <2 x i8> %m10, %m01
538  %shl = shl <2 x i8> %addc, <i8 4, i8 4>
539  %retLo = add <2 x i8> %shl, %m00
540  ret <2 x i8> %retLo
541}
542
543define <2 x i8> @mul_v2i8_low_one_extra_user(<2 x i8> %in0, <2 x i8> %in1) {
544; CHECK-LABEL: @mul_v2i8_low_one_extra_user(
545; CHECK-NEXT:    [[IN0HI:%.*]] = lshr <2 x i8> [[IN0:%.*]], splat (i8 4)
546; CHECK-NEXT:    [[IN1LO:%.*]] = and <2 x i8> [[IN1:%.*]], splat (i8 15)
547; CHECK-NEXT:    [[M01:%.*]] = mul nuw <2 x i8> [[IN1LO]], [[IN0HI]]
548; CHECK-NEXT:    call void @use_v2i8(<2 x i8> [[M01]])
549; CHECK-NEXT:    [[RETLO:%.*]] = mul <2 x i8> [[IN0]], [[IN1]]
550; CHECK-NEXT:    ret <2 x i8> [[RETLO]]
551;
552  %In0Lo = and <2 x i8> %in0, <i8 15, i8 15>
553  %In0Hi = lshr <2 x i8> %in0, <i8 4, i8 4>
554  %In1Lo = and <2 x i8> %in1, <i8 15, i8 15>
555  %In1Hi = lshr <2 x i8> %in1, <i8 4, i8 4>
556  %m10 = mul <2 x i8> %In1Hi, %In0Lo
557  %m01 = mul <2 x i8> %In1Lo, %In0Hi
558  call void @use_v2i8(<2 x i8> %m01)
559  %m00 = mul <2 x i8> %In1Lo, %In0Lo
560  %addc = add <2 x i8> %m10, %m01
561  %shl = shl <2 x i8> %addc, <i8 4, i8 4>
562  %retLo = add <2 x i8> %shl, %m00
563  ret <2 x i8> %retLo
564}
565
566; Support wide width
567define i130 @mul130_low(i130 %in0, i130 %in1) {
568; CHECK-LABEL: @mul130_low(
569; CHECK-NEXT:    [[RETLO:%.*]] = mul i130 [[IN0:%.*]], [[IN1:%.*]]
570; CHECK-NEXT:    ret i130 [[RETLO]]
571;
572  %In0Lo = and i130 %in0, 36893488147419103231
573  %In0Hi = lshr i130 %in0, 65
574  %In1Lo = and i130 %in1, 36893488147419103231
575  %In1Hi = lshr i130 %in1, 65
576  %m10 = mul i130 %In1Hi, %In0Lo
577  %m01 = mul i130 %In1Lo, %In0Hi
578  %m00 = mul i130 %In1Lo, %In0Lo
579  %addc = add i130 %m10, %m01
580  %shl = shl i130 %addc, 65
581  %retLo = add i130 %shl, %m00
582  ret i130 %retLo
583}
584
585define i130 @mul130_low_one_extra_user(i130 %in0, i130 %in1) {
586; CHECK-LABEL: @mul130_low_one_extra_user(
587; CHECK-NEXT:    [[IN0LO:%.*]] = and i130 [[IN0:%.*]], 36893488147419103231
588; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i130 [[IN1:%.*]], 65
589; CHECK-NEXT:    [[M10:%.*]] = mul nuw i130 [[IN1HI]], [[IN0LO]]
590; CHECK-NEXT:    call void @use130(i130 [[M10]])
591; CHECK-NEXT:    [[RETLO:%.*]] = mul i130 [[IN0]], [[IN1]]
592; CHECK-NEXT:    ret i130 [[RETLO]]
593;
594  %In0Lo = and i130 %in0, 36893488147419103231
595  %In0Hi = lshr i130 %in0, 65
596  %In1Lo = and i130 %in1, 36893488147419103231
597  %In1Hi = lshr i130 %in1, 65
598  %m10 = mul i130 %In1Hi, %In0Lo
599  call void @use130(i130 %m10)
600  %m01 = mul i130 %In1Lo, %In0Hi
601  %m00 = mul i130 %In1Lo, %In0Lo
602  %addc = add i130 %m10, %m01
603  %shl = shl i130 %addc, 65
604  %retLo = add i130 %shl, %m00
605  ret i130 %retLo
606}
607
608; Negative case: Skip odd bitwidth type
609define i9 @mul9_low(i9 %in0, i9 %in1) {
610; CHECK-LABEL: @mul9_low(
611; CHECK-NEXT:    [[IN0LO:%.*]] = and i9 [[IN0:%.*]], 15
612; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i9 [[IN0]], 4
613; CHECK-NEXT:    [[IN1LO:%.*]] = and i9 [[IN1:%.*]], 15
614; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i9 [[IN1]], 4
615; CHECK-NEXT:    [[M10:%.*]] = mul nuw i9 [[IN1HI]], [[IN0LO]]
616; CHECK-NEXT:    [[M01:%.*]] = mul nuw i9 [[IN1LO]], [[IN0HI]]
617; CHECK-NEXT:    [[M00:%.*]] = mul nuw nsw i9 [[IN1LO]], [[IN0LO]]
618; CHECK-NEXT:    [[ADDC:%.*]] = add i9 [[M10]], [[M01]]
619; CHECK-NEXT:    [[SHL:%.*]] = shl i9 [[ADDC]], 4
620; CHECK-NEXT:    [[RETLO:%.*]] = add i9 [[SHL]], [[M00]]
621; CHECK-NEXT:    ret i9 [[RETLO]]
622;
623  %In0Lo = and i9 %in0, 15
624  %In0Hi = lshr i9 %in0, 4
625  %In1Lo = and i9 %in1, 15
626  %In1Hi = lshr i9 %in1, 4
627  %m10 = mul i9 %In1Hi, %In0Lo
628  %m01 = mul i9 %In1Lo, %In0Hi
629  %m00 = mul i9 %In1Lo, %In0Lo
630  %addc = add i9 %m10, %m01
631  %shl = shl i9 %addc, 4
632  %retLo = add i9 %shl, %m00
633  ret i9 %retLo
634}
635
636; Negative test: Should not remote the "and", https://alive2.llvm.org/ce/z/JLmNU5
637define i64 @mul64_low_no_and(i64 %in0, i64 %in1) {
638; CHECK-LABEL: @mul64_low_no_and(
639; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i64 [[IN0:%.*]], 32
640; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i64 [[IN1:%.*]], 32
641; CHECK-NEXT:    [[M10:%.*]] = mul i64 [[IN1HI]], [[IN0]]
642; CHECK-NEXT:    [[M01:%.*]] = mul i64 [[IN1]], [[IN0HI]]
643; CHECK-NEXT:    [[M00:%.*]] = mul i64 [[IN1]], [[IN0]]
644; CHECK-NEXT:    [[ADDC:%.*]] = add i64 [[M10]], [[M01]]
645; CHECK-NEXT:    [[SHL:%.*]] = shl i64 [[ADDC]], 32
646; CHECK-NEXT:    [[RETLO:%.*]] = add i64 [[SHL]], [[M00]]
647; CHECK-NEXT:    ret i64 [[RETLO]]
648;
649  %In0Hi = lshr i64 %in0, 32
650  %In1Hi = lshr i64 %in1, 32
651  %m10 = mul i64 %In1Hi, %in0
652  %m01 = mul i64 %in1, %In0Hi
653  %m00 = mul i64 %in1, %in0
654  %addc = add i64 %m10, %m01
655  %shl = shl i64 %addc, 32
656  %retLo = add i64 %shl, %m00
657  ret i64 %retLo
658}
659
660; Negative test: Miss match the shift amount
661define i16 @mul16_low_miss_shift_amount(i16 %in0, i16 %in1) {
662; CHECK-LABEL: @mul16_low_miss_shift_amount(
663; CHECK-NEXT:    [[IN0LO:%.*]] = and i16 [[IN0:%.*]], 127
664; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i16 [[IN0]], 8
665; CHECK-NEXT:    [[IN1LO:%.*]] = and i16 [[IN1:%.*]], 127
666; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i16 [[IN1]], 8
667; CHECK-NEXT:    [[M10:%.*]] = mul nuw nsw i16 [[IN1HI]], [[IN0LO]]
668; CHECK-NEXT:    [[M01:%.*]] = mul nuw nsw i16 [[IN1LO]], [[IN0HI]]
669; CHECK-NEXT:    [[M00:%.*]] = mul nuw nsw i16 [[IN1LO]], [[IN0LO]]
670; CHECK-NEXT:    [[ADDC:%.*]] = add nuw i16 [[M10]], [[M01]]
671; CHECK-NEXT:    [[SHL:%.*]] = shl i16 [[ADDC]], 8
672; CHECK-NEXT:    [[RETLO:%.*]] = add i16 [[SHL]], [[M00]]
673; CHECK-NEXT:    ret i16 [[RETLO]]
674;
675  %In0Lo = and i16 %in0, 127 ; Should be 255
676  %In0Hi = lshr i16 %in0, 8
677  %In1Lo = and i16 %in1, 127
678  %In1Hi = lshr i16 %in1, 8
679  %m10 = mul i16 %In1Hi, %In0Lo
680  %m01 = mul i16 %In1Lo, %In0Hi
681  %m00 = mul i16 %In1Lo, %In0Lo
682  %addc = add i16 %m10, %m01
683  %shl = shl i16 %addc, 8
684  %retLo = add i16 %shl, %m00
685  ret i16 %retLo
686}
687
688; Negative test: Miss match the half width
689define i8 @mul8_low_miss_half_width(i8 %in0, i8 %in1) {
690; CHECK-LABEL: @mul8_low_miss_half_width(
691; CHECK-NEXT:    [[IN0LO:%.*]] = and i8 [[IN0:%.*]], 15
692; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i8 [[IN0]], 3
693; CHECK-NEXT:    [[IN1LO:%.*]] = and i8 [[IN1:%.*]], 15
694; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i8 [[IN1]], 3
695; CHECK-NEXT:    [[M10:%.*]] = mul i8 [[IN1HI]], [[IN0LO]]
696; CHECK-NEXT:    [[M01:%.*]] = mul i8 [[IN1LO]], [[IN0HI]]
697; CHECK-NEXT:    [[M00:%.*]] = mul nuw i8 [[IN1LO]], [[IN0LO]]
698; CHECK-NEXT:    [[ADDC:%.*]] = add i8 [[M10]], [[M01]]
699; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[ADDC]], 3
700; CHECK-NEXT:    [[RETLO:%.*]] = add i8 [[SHL]], [[M00]]
701; CHECK-NEXT:    ret i8 [[RETLO]]
702;
703  %In0Lo = and i8 %in0, 15
704  %In0Hi = lshr i8 %in0, 3 ; Should be 4
705  %In1Lo = and i8 %in1, 15
706  %In1Hi = lshr i8 %in1, 3
707  %m10 = mul i8 %In1Hi, %In0Lo
708  %m01 = mul i8 %In1Lo, %In0Hi
709  %m00 = mul i8 %In1Lo, %In0Lo
710  %addc = add i8 %m10, %m01
711  %shl = shl i8 %addc, 3
712  %retLo = add i8 %shl, %m00
713  ret i8 %retLo
714}
715
716; Test case to show shl doesn't need hasOneUse constraint
717define i32 @mul32_low_extra_shl_use(i32 %in0, i32 %in1) {
718; CHECK-LABEL: @mul32_low_extra_shl_use(
719; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i32 [[IN0:%.*]], 16
720; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i32 [[IN1:%.*]], 16
721; CHECK-NEXT:    [[M10:%.*]] = mul i32 [[IN1HI]], [[IN0]]
722; CHECK-NEXT:    [[M01:%.*]] = mul i32 [[IN1]], [[IN0HI]]
723; CHECK-NEXT:    [[ADDC:%.*]] = add i32 [[M10]], [[M01]]
724; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[ADDC]], 16
725; CHECK-NEXT:    call void @use32(i32 [[SHL]])
726; CHECK-NEXT:    [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]]
727; CHECK-NEXT:    ret i32 [[RETLO]]
728;
729  %In0Lo = and i32 %in0, 65535
730  %In0Hi = lshr i32 %in0, 16
731  %In1Lo = and i32 %in1, 65535
732  %In1Hi = lshr i32 %in1, 16
733  %m10 = mul i32 %In1Hi, %In0Lo
734  %m01 = mul i32 %In1Lo, %In0Hi
735  %m00 = mul i32 %In1Lo, %In0Lo
736  %addc = add i32 %m10, %m01
737  %shl = shl i32 %addc, 16
738  call void @use32(i32 %shl)
739  %retLo = add i32 %shl, %m00
740  ret i32 %retLo
741}
742