xref: /llvm-project/llvm/test/Transforms/InstCombine/mul_full_64.ll (revision a105877646d68e48cdeeeadd9d1e075dc3c5d68d)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-linux-gnu"
6
7define { i64, i64 } @mul_full_64_variant0(i64 %x, i64 %y) {
8; CHECK-LABEL: @mul_full_64_variant0(
9; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X:%.*]], 4294967295
10; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
11; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295
12; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
13; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]]
14; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]]
15; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]]
16; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]]
17; CHECK-NEXT:    [[T0L:%.*]] = and i64 [[T0]], 4294967295
18; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
19; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T0H]], [[T1]]
20; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
21; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
22; CHECK-NEXT:    [[U1:%.*]] = add i64 [[U0L]], [[T2]]
23; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
24; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
25; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
26; CHECK-NEXT:    [[LO:%.*]] = or disjoint i64 [[U1LS]], [[T0L]]
27; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U2]], [[U1H]]
28; CHECK-NEXT:    [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0
29; CHECK-NEXT:    [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1
30; CHECK-NEXT:    ret { i64, i64 } [[RES]]
31;
32  %xl = and i64 %x, 4294967295
33  %xh = lshr i64 %x, 32
34  %yl = and i64 %y, 4294967295
35  %yh = lshr i64 %y, 32
36
37  %t0 = mul nuw i64 %yl, %xl
38  %t1 = mul nuw i64 %yl, %xh
39  %t2 = mul nuw i64 %yh, %xl
40  %t3 = mul nuw i64 %yh, %xh
41
42  %t0l = and i64 %t0, 4294967295
43  %t0h = lshr i64 %t0, 32
44
45  %u0 = add i64 %t0h, %t1
46  %u0l = and i64 %u0, 4294967295
47  %u0h = lshr i64 %u0, 32
48
49  %u1 = add i64 %u0l, %t2
50  %u1ls = shl i64 %u1, 32
51  %u1h = lshr i64 %u1, 32
52
53  %u2 = add i64 %u0h, %t3
54
55  %lo = or i64 %u1ls, %t0l
56  %hi = add i64 %u2, %u1h
57
58  %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0
59  %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1
60  ret { i64, i64 } %res
61}
62
63; The following variants 1 - 3 are generated with this C++ program:
64;
65; #include <stdint.h>
66;
67; uint64_t mulxu(uint64_t a, uint64_t b, uint64_t *rhi) {
68;     auto hi = [](uint64_t x) { return x >> 32; };
69;     auto lo = [](uint64_t x) { return uint32_t(x); };
70;     uint64_t xl = lo(a);
71;     uint64_t xh = hi(a);
72;     uint64_t yl = lo(b);
73;     uint64_t yh = hi(b);
74;
75;     uint64_t rhh = xh * yh;
76;     uint64_t rhl = xh * yl;
77;     uint64_t rlh = xl * yh;
78;     uint64_t rll = xl * yl;
79;
80;     *rhi = rhh + hi(rhl + hi(rll)) + hi((rlh + lo(rhl + hi(rll))));
81; #if ONE
82;     return a*b;
83; #elif TWO
84;     return (uint64_t(lo(rlh + lo(rhl + hi(rll)))) << 32) + lo(rll);
85; #elif THREE
86;     return ((rlh + rhl) << 32) + rll;
87; #endif
88; }
89
90define i64 @mul_full_64_variant1(i64 %a, i64 %b, ptr nocapture %rhi) {
91; CHECK-LABEL: @mul_full_64_variant1(
92; CHECK-NEXT:    [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295
93; CHECK-NEXT:    [[SHR_I43:%.*]] = lshr i64 [[A]], 32
94; CHECK-NEXT:    [[CONV3:%.*]] = and i64 [[B:%.*]], 4294967295
95; CHECK-NEXT:    [[SHR_I41:%.*]] = lshr i64 [[B]], 32
96; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i64 [[SHR_I41]], [[SHR_I43]]
97; CHECK-NEXT:    [[MUL5:%.*]] = mul nuw i64 [[CONV3]], [[SHR_I43]]
98; CHECK-NEXT:    [[MUL6:%.*]] = mul nuw i64 [[SHR_I41]], [[CONV]]
99; CHECK-NEXT:    [[MUL7:%.*]] = mul nuw i64 [[CONV3]], [[CONV]]
100; CHECK-NEXT:    [[SHR_I40:%.*]] = lshr i64 [[MUL7]], 32
101; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[SHR_I40]], [[MUL5]]
102; CHECK-NEXT:    [[SHR_I39:%.*]] = lshr i64 [[ADD]], 32
103; CHECK-NEXT:    [[ADD10:%.*]] = add i64 [[SHR_I39]], [[MUL]]
104; CHECK-NEXT:    [[CONV14:%.*]] = and i64 [[ADD]], 4294967295
105; CHECK-NEXT:    [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]]
106; CHECK-NEXT:    [[SHR_I:%.*]] = lshr i64 [[ADD15]], 32
107; CHECK-NEXT:    [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I]]
108; CHECK-NEXT:    store i64 [[ADD17]], ptr [[RHI:%.*]], align 8
109; CHECK-NEXT:    [[MULLO:%.*]] = mul i64 [[B]], [[A]]
110; CHECK-NEXT:    ret i64 [[MULLO]]
111;
112  %conv = and i64 %a, 4294967295
113  %shr.i43 = lshr i64 %a, 32
114  %conv3 = and i64 %b, 4294967295
115  %shr.i41 = lshr i64 %b, 32
116  %mul = mul nuw i64 %shr.i41, %shr.i43
117  %mul5 = mul nuw i64 %conv3, %shr.i43
118  %mul6 = mul nuw i64 %shr.i41, %conv
119  %mul7 = mul nuw i64 %conv3, %conv
120  %shr.i40 = lshr i64 %mul7, 32
121  %add = add i64 %shr.i40, %mul5
122  %shr.i39 = lshr i64 %add, 32
123  %add10 = add i64 %shr.i39, %mul
124  %conv14 = and i64 %add, 4294967295
125  %add15 = add i64 %conv14, %mul6
126  %shr.i = lshr i64 %add15, 32
127  %add17 = add i64 %add10, %shr.i
128  store i64 %add17, ptr %rhi, align 8
129  %mullo = mul i64 %b, %a
130  ret i64 %mullo
131}
132
133define i64 @mul_full_64_variant2(i64 %a, i64 %b, ptr nocapture %rhi) {
134; CHECK-LABEL: @mul_full_64_variant2(
135; CHECK-NEXT:    [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295
136; CHECK-NEXT:    [[SHR_I58:%.*]] = lshr i64 [[A]], 32
137; CHECK-NEXT:    [[CONV3:%.*]] = and i64 [[B:%.*]], 4294967295
138; CHECK-NEXT:    [[SHR_I56:%.*]] = lshr i64 [[B]], 32
139; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i64 [[SHR_I56]], [[SHR_I58]]
140; CHECK-NEXT:    [[MUL5:%.*]] = mul nuw i64 [[CONV3]], [[SHR_I58]]
141; CHECK-NEXT:    [[MUL6:%.*]] = mul nuw i64 [[SHR_I56]], [[CONV]]
142; CHECK-NEXT:    [[MUL7:%.*]] = mul nuw i64 [[CONV3]], [[CONV]]
143; CHECK-NEXT:    [[SHR_I55:%.*]] = lshr i64 [[MUL7]], 32
144; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[SHR_I55]], [[MUL5]]
145; CHECK-NEXT:    [[SHR_I54:%.*]] = lshr i64 [[ADD]], 32
146; CHECK-NEXT:    [[ADD10:%.*]] = add i64 [[SHR_I54]], [[MUL]]
147; CHECK-NEXT:    [[CONV14:%.*]] = and i64 [[ADD]], 4294967295
148; CHECK-NEXT:    [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]]
149; CHECK-NEXT:    [[SHR_I51:%.*]] = lshr i64 [[ADD15]], 32
150; CHECK-NEXT:    [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I51]]
151; CHECK-NEXT:    store i64 [[ADD17]], ptr [[RHI:%.*]], align 8
152; CHECK-NEXT:    [[CONV24:%.*]] = shl i64 [[ADD15]], 32
153; CHECK-NEXT:    [[CONV26:%.*]] = and i64 [[MUL7]], 4294967295
154; CHECK-NEXT:    [[ADD27:%.*]] = or disjoint i64 [[CONV24]], [[CONV26]]
155; CHECK-NEXT:    ret i64 [[ADD27]]
156;
157  %conv = and i64 %a, 4294967295
158  %shr.i58 = lshr i64 %a, 32
159  %conv3 = and i64 %b, 4294967295
160  %shr.i56 = lshr i64 %b, 32
161  %mul = mul nuw i64 %shr.i56, %shr.i58
162  %mul5 = mul nuw i64 %conv3, %shr.i58
163  %mul6 = mul nuw i64 %shr.i56, %conv
164  %mul7 = mul nuw i64 %conv3, %conv
165  %shr.i55 = lshr i64 %mul7, 32
166  %add = add i64 %shr.i55, %mul5
167  %shr.i54 = lshr i64 %add, 32
168  %add10 = add i64 %shr.i54, %mul
169  %conv14 = and i64 %add, 4294967295
170  %add15 = add i64 %conv14, %mul6
171  %shr.i51 = lshr i64 %add15, 32
172  %add17 = add i64 %add10, %shr.i51
173  store i64 %add17, ptr %rhi, align 8
174  %conv24 = shl i64 %add15, 32
175  %conv26 = and i64 %mul7, 4294967295
176  %add27 = or i64 %conv24, %conv26
177  ret i64 %add27
178}
179
180; Negative test case for mul_fold function: MUL7 is used in more than one place
181define i64 @mul_full_64_variant3(i64 %a, i64 %b, ptr nocapture %rhi) {
182; CHECK-LABEL: @mul_full_64_variant3(
183; CHECK-NEXT:    [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295
184; CHECK-NEXT:    [[SHR_I45:%.*]] = lshr i64 [[A]], 32
185; CHECK-NEXT:    [[CONV3:%.*]] = and i64 [[B:%.*]], 4294967295
186; CHECK-NEXT:    [[SHR_I43:%.*]] = lshr i64 [[B]], 32
187; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i64 [[SHR_I43]], [[SHR_I45]]
188; CHECK-NEXT:    [[MUL5:%.*]] = mul nuw i64 [[CONV3]], [[SHR_I45]]
189; CHECK-NEXT:    [[MUL6:%.*]] = mul nuw i64 [[SHR_I43]], [[CONV]]
190; CHECK-NEXT:    [[MUL7:%.*]] = mul nuw i64 [[CONV3]], [[CONV]]
191; CHECK-NEXT:    [[SHR_I42:%.*]] = lshr i64 [[MUL7]], 32
192; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[SHR_I42]], [[MUL5]]
193; CHECK-NEXT:    [[SHR_I41:%.*]] = lshr i64 [[ADD]], 32
194; CHECK-NEXT:    [[ADD10:%.*]] = add i64 [[SHR_I41]], [[MUL]]
195; CHECK-NEXT:    [[CONV14:%.*]] = and i64 [[ADD]], 4294967295
196; CHECK-NEXT:    [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]]
197; CHECK-NEXT:    [[SHR_I:%.*]] = lshr i64 [[ADD15]], 32
198; CHECK-NEXT:    [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I]]
199; CHECK-NEXT:    store i64 [[ADD17]], ptr [[RHI:%.*]], align 8
200; CHECK-NEXT:    [[ADD18:%.*]] = add i64 [[MUL6]], [[MUL5]]
201; CHECK-NEXT:    [[SHL:%.*]] = shl i64 [[ADD18]], 32
202; CHECK-NEXT:    [[ADD19:%.*]] = add i64 [[SHL]], [[MUL7]]
203; CHECK-NEXT:    ret i64 [[ADD19]]
204;
205  %conv = and i64 %a, 4294967295
206  %shr.i45 = lshr i64 %a, 32
207  %conv3 = and i64 %b, 4294967295
208  %shr.i43 = lshr i64 %b, 32
209  %mul = mul nuw i64 %shr.i43, %shr.i45
210  %mul5 = mul nuw i64 %conv3, %shr.i45
211  %mul6 = mul nuw i64 %shr.i43, %conv
212  %mul7 = mul nuw i64 %conv3, %conv
213  %shr.i42 = lshr i64 %mul7, 32
214  %add = add i64 %shr.i42, %mul5
215  %shr.i41 = lshr i64 %add, 32
216  %add10 = add i64 %shr.i41, %mul
217  %conv14 = and i64 %add, 4294967295
218  %add15 = add i64 %conv14, %mul6
219  %shr.i = lshr i64 %add15, 32
220  %add17 = add i64 %add10, %shr.i
221  store i64 %add17, ptr %rhi, align 8
222  %add18 = add i64 %mul6, %mul5
223  %shl = shl i64 %add18, 32
224  %add19 = add i64 %shl, %mul7
225  ret i64 %add19
226}
227
228
229define { i32, i32 } @mul_full_32(i32 %x, i32 %y) {
230; CHECK-LABEL: @mul_full_32(
231; CHECK-NEXT:    [[XL:%.*]] = and i32 [[X:%.*]], 65535
232; CHECK-NEXT:    [[XH:%.*]] = lshr i32 [[X]], 16
233; CHECK-NEXT:    [[YL:%.*]] = and i32 [[Y:%.*]], 65535
234; CHECK-NEXT:    [[YH:%.*]] = lshr i32 [[Y]], 16
235; CHECK-NEXT:    [[T0:%.*]] = mul nuw i32 [[YL]], [[XL]]
236; CHECK-NEXT:    [[T1:%.*]] = mul nuw i32 [[YL]], [[XH]]
237; CHECK-NEXT:    [[T2:%.*]] = mul nuw i32 [[YH]], [[XL]]
238; CHECK-NEXT:    [[T3:%.*]] = mul nuw i32 [[YH]], [[XH]]
239; CHECK-NEXT:    [[T0L:%.*]] = and i32 [[T0]], 65535
240; CHECK-NEXT:    [[T0H:%.*]] = lshr i32 [[T0]], 16
241; CHECK-NEXT:    [[U0:%.*]] = add i32 [[T0H]], [[T1]]
242; CHECK-NEXT:    [[U0L:%.*]] = and i32 [[U0]], 65535
243; CHECK-NEXT:    [[U0H:%.*]] = lshr i32 [[U0]], 16
244; CHECK-NEXT:    [[U1:%.*]] = add i32 [[U0L]], [[T2]]
245; CHECK-NEXT:    [[U1LS:%.*]] = shl i32 [[U1]], 16
246; CHECK-NEXT:    [[U1H:%.*]] = lshr i32 [[U1]], 16
247; CHECK-NEXT:    [[U2:%.*]] = add i32 [[U0H]], [[T3]]
248; CHECK-NEXT:    [[LO:%.*]] = or disjoint i32 [[U1LS]], [[T0L]]
249; CHECK-NEXT:    [[HI:%.*]] = add i32 [[U2]], [[U1H]]
250; CHECK-NEXT:    [[RES_LO:%.*]] = insertvalue { i32, i32 } undef, i32 [[LO]], 0
251; CHECK-NEXT:    [[RES:%.*]] = insertvalue { i32, i32 } [[RES_LO]], i32 [[HI]], 1
252; CHECK-NEXT:    ret { i32, i32 } [[RES]]
253;
254  %xl = and i32 %x, 65535
255  %xh = lshr i32 %x, 16
256  %yl = and i32 %y, 65535
257  %yh = lshr i32 %y, 16
258
259  %t0 = mul nuw i32 %yl, %xl
260  %t1 = mul nuw i32 %yl, %xh
261  %t2 = mul nuw i32 %yh, %xl
262  %t3 = mul nuw i32 %yh, %xh
263
264  %t0l = and i32 %t0, 65535
265  %t0h = lshr i32 %t0, 16
266
267  %u0 = add i32 %t0h, %t1
268  %u0l = and i32 %u0, 65535
269  %u0h = lshr i32 %u0, 16
270
271  %u1 = add i32 %u0l, %t2
272  %u1ls = shl i32 %u1, 16
273  %u1h = lshr i32 %u1, 16
274
275  %u2 = add i32 %u0h, %t3
276
277  %lo = or i32 %u1ls, %t0l
278  %hi = add i32 %u2, %u1h
279
280  %res_lo = insertvalue { i32, i32 } undef, i32 %lo, 0
281  %res = insertvalue { i32, i32 } %res_lo, i32 %hi, 1
282  ret { i32, i32 } %res
283}
284
285
286declare i64 @get_number()
287
288; In the following test cases %x and %y are instructions, not arguments.
289; This tests the placement of mul i128 and zexts.
290; Instructions are also shuffled.
291
292define { i64, i64 } @mul_full_64_variant0_1() {
293; CHECK-LABEL: @mul_full_64_variant0_1(
294; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @get_number()
295; CHECK-NEXT:    [[YL:%.*]] = and i64 [[TMP1]], 4294967295
296; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[TMP1]], 32
297; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @get_number()
298; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[TMP2]], 32
299; CHECK-NEXT:    [[XL:%.*]] = and i64 [[TMP2]], 4294967295
300; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]]
301; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]]
302; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]]
303; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]]
304; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
305; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T0H]], [[T1]]
306; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
307; CHECK-NEXT:    [[U1:%.*]] = add i64 [[U0L]], [[T2]]
308; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
309; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
310; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
311; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U2]], [[U1H]]
312; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
313; CHECK-NEXT:    [[T0L:%.*]] = and i64 [[T0]], 4294967295
314; CHECK-NEXT:    [[LO:%.*]] = or disjoint i64 [[U1LS]], [[T0L]]
315; CHECK-NEXT:    [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0
316; CHECK-NEXT:    [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1
317; CHECK-NEXT:    ret { i64, i64 } [[RES]]
318;
319  %1 = call i64 @get_number()
320  %yl = and i64 %1, 4294967295
321  %yh = lshr i64 %1, 32
322
323  %2 = call i64 @get_number()
324  %xh = lshr i64 %2, 32
325  %xl = and i64 %2, 4294967295
326
327  %t1 = mul nuw i64 %yl, %xh
328  %t3 = mul nuw i64 %yh, %xh
329  %t2 = mul nuw i64 %yh, %xl
330  %t0 = mul nuw i64 %yl, %xl
331
332  %t0h = lshr i64 %t0, 32
333  %u0 = add i64 %t0h, %t1
334  %u0l = and i64 %u0, 4294967295
335  %u1 = add i64 %u0l, %t2
336  %u0h = lshr i64 %u0, 32
337  %u2 = add i64 %u0h, %t3
338  %u1h = lshr i64 %u1, 32
339  %hi = add i64 %u2, %u1h
340
341  %u1ls = shl i64 %u1, 32
342  %t0l = and i64 %t0, 4294967295
343  %lo = or i64 %u1ls, %t0l
344
345  %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0
346  %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1
347  ret { i64, i64 } %res
348}
349
350define { i64, i64 } @mul_full_64_variant0_2() {
351; CHECK-LABEL: @mul_full_64_variant0_2(
352; CHECK-NEXT:    [[X:%.*]] = call i64 @get_number()
353; CHECK-NEXT:    [[Y:%.*]] = call i64 @get_number()
354; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y]], 4294967295
355; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
356; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
357; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X]], 4294967295
358; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[XH]], [[YH]]
359; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[XL]], [[YH]]
360; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[XH]], [[YL]]
361; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[XL]], [[YL]]
362; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
363; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T1]], [[T0H]]
364; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
365; CHECK-NEXT:    [[U1:%.*]] = add i64 [[T2]], [[U0L]]
366; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
367; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
368; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
369; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U1H]], [[U2]]
370; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
371; CHECK-NEXT:    [[T0L:%.*]] = and i64 [[T0]], 4294967295
372; CHECK-NEXT:    [[LO:%.*]] = or disjoint i64 [[T0L]], [[U1LS]]
373; CHECK-NEXT:    [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0
374; CHECK-NEXT:    [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1
375; CHECK-NEXT:    ret { i64, i64 } [[RES]]
376;
377  %x = call i64 @get_number()
378  %y = call i64 @get_number()
379
380  %yl = and i64 %y, 4294967295
381  %yh = lshr i64 %y, 32
382  %xh = lshr i64 %x, 32
383  %xl = and i64 %x, 4294967295
384
385  %t3 = mul nuw i64 %xh, %yh
386  %t2 = mul nuw i64 %xl, %yh
387  %t1 = mul nuw i64 %xh, %yl
388  %t0 = mul nuw i64 %xl, %yl
389
390  %t0h = lshr i64 %t0, 32
391  %u0 = add i64 %t1, %t0h
392  %u0l = and i64 %u0, 4294967295
393  %u1 = add i64 %t2, %u0l
394  %u0h = lshr i64 %u0, 32
395  %u2 = add i64 %u0h, %t3
396  %u1h = lshr i64 %u1, 32
397  %hi = add i64 %u1h, %u2
398
399  %u1ls = shl i64 %u1, 32
400  %t0l = and i64 %t0, 4294967295
401  %lo = or i64 %t0l, %u1ls
402
403  %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0
404  %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1
405  ret { i64, i64 } %res
406}
407
408
409define i64 @umulh_64(i64 %x, i64 %y) {
410; CHECK-LABEL: @umulh_64(
411; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X:%.*]], 4294967295
412; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
413; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295
414; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
415; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]]
416; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]]
417; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]]
418; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]]
419; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
420; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T0H]], [[T1]]
421; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
422; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
423; CHECK-NEXT:    [[U1:%.*]] = add i64 [[U0L]], [[T2]]
424; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
425; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
426; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U2]], [[U1H]]
427; CHECK-NEXT:    ret i64 [[HI]]
428;
429  %xl = and i64 %x, 4294967295
430  %xh = lshr i64 %x, 32
431  %yl = and i64 %y, 4294967295
432  %yh = lshr i64 %y, 32
433
434  %t0 = mul nuw i64 %yl, %xl
435  %t1 = mul nuw i64 %yl, %xh
436  %t2 = mul nuw i64 %yh, %xl
437  %t3 = mul nuw i64 %yh, %xh
438
439  %t0h = lshr i64 %t0, 32
440
441  %u0 = add i64 %t0h, %t1
442  %u0l = and i64 %u0, 4294967295
443  %u0h = lshr i64 %u0, 32
444
445  %u1 = add i64 %u0l, %t2
446  %u1h = lshr i64 %u1, 32
447
448  %u2 = add i64 %u0h, %t3
449
450  %hi = add i64 %u2, %u1h
451  ret i64 %hi
452}
453
454; TODO: https://alive2.llvm.org/ce/z/y26zaW
455define i64 @mullo(i64 %x, i64 %y) {
456; CHECK-LABEL: @mullo(
457; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X:%.*]], 4294967295
458; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
459; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295
460; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
461; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]]
462; CHECK-NEXT:    [[T1:%.*]] = mul i64 [[Y]], [[XH]]
463; CHECK-NEXT:    [[T2:%.*]] = mul i64 [[YH]], [[X]]
464; CHECK-NEXT:    [[T0L:%.*]] = and i64 [[T0]], 4294967295
465; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
466; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T0H]], [[T1]]
467; CHECK-NEXT:    [[U1:%.*]] = add i64 [[U0]], [[T2]]
468; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
469; CHECK-NEXT:    [[LO:%.*]] = or disjoint i64 [[U1LS]], [[T0L]]
470; CHECK-NEXT:    ret i64 [[LO]]
471;
472  %xl = and i64 %x, 4294967295
473  %xh = lshr i64 %x, 32
474  %yl = and i64 %y, 4294967295
475  %yh = lshr i64 %y, 32
476
477  %t0 = mul nuw i64 %yl, %xl
478  %t1 = mul nuw i64 %yl, %xh
479  %t2 = mul nuw i64 %yh, %xl
480
481  %t0l = and i64 %t0, 4294967295
482  %t0h = lshr i64 %t0, 32
483
484  %u0 = add i64 %t0h, %t1
485  %u0l = and i64 %u0, 4294967295
486
487  %u1 = add i64 %u0l, %t2
488  %u1ls = shl i64 %u1, 32
489
490  %lo = or i64 %u1ls, %t0l
491  ret i64 %lo
492}
493
494
495define i64 @mullo_variant3(i64 %a, i64 %b) {
496; CHECK-LABEL: @mullo_variant3(
497; CHECK-NEXT:    [[LO:%.*]] = mul i64 [[A:%.*]], [[B:%.*]]
498; CHECK-NEXT:    ret i64 [[LO]]
499;
500  %al = and i64 %a, 4294967295
501  %ah = lshr i64 %a, 32
502  %bl = and i64 %b, 4294967295
503  %bh = lshr i64 %b, 32
504
505  %t0 = mul nuw i64 %bl, %al
506  %t1 = mul nuw i64 %bl, %ah
507  %t2 = mul nuw i64 %bh, %al
508
509  %u1 = add i64 %t2, %t1
510  %u1ls = shl i64 %u1, 32
511
512  %lo = add i64 %u1ls, %t0
513  ret i64 %lo
514}
515
516
517declare void @eat_i64(i64)
518declare void @eat_i128(i128)
519
520define i64 @mullo_duplicate(i64 %x, i64 %y) {
521; CHECK-LABEL: @mullo_duplicate(
522; CHECK-NEXT:    [[DUPLICATED_MUL:%.*]] = mul i64 [[X:%.*]], [[Y:%.*]]
523; CHECK-NEXT:    call void @eat_i64(i64 [[DUPLICATED_MUL]])
524; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X]], 4294967295
525; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
526; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y]], 4294967295
527; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
528; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]]
529; CHECK-NEXT:    [[T1:%.*]] = mul i64 [[Y]], [[XH]]
530; CHECK-NEXT:    [[T2:%.*]] = mul i64 [[YH]], [[X]]
531; CHECK-NEXT:    [[T0L:%.*]] = and i64 [[T0]], 4294967295
532; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
533; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T0H]], [[T1]]
534; CHECK-NEXT:    [[U1:%.*]] = add i64 [[U0]], [[T2]]
535; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
536; CHECK-NEXT:    [[LO:%.*]] = or disjoint i64 [[U1LS]], [[T0L]]
537; CHECK-NEXT:    ret i64 [[LO]]
538;
539  %duplicated_mul = mul i64 %x, %y
540  call void @eat_i64(i64 %duplicated_mul)
541
542  %xl = and i64 %x, 4294967295
543  %xh = lshr i64 %x, 32
544  %yl = and i64 %y, 4294967295
545  %yh = lshr i64 %y, 32
546
547  %t0 = mul nuw i64 %yl, %xl
548  %t1 = mul nuw i64 %yl, %xh
549  %t2 = mul nuw i64 %yh, %xl
550
551  %t0l = and i64 %t0, 4294967295
552  %t0h = lshr i64 %t0, 32
553
554  %u0 = add i64 %t0h, %t1
555  %u0l = and i64 %u0, 4294967295
556
557  %u1 = add i64 %u0l, %t2
558  %u1ls = shl i64 %u1, 32
559
560  %lo = or i64 %u1ls, %t0l
561  ret i64 %lo
562}
563
564define { i64, i64 } @mul_full_64_duplicate(i64 %x, i64 %y) {
565; CHECK-LABEL: @mul_full_64_duplicate(
566; CHECK-NEXT:    [[XX:%.*]] = zext i64 [[X:%.*]] to i128
567; CHECK-NEXT:    [[YY:%.*]] = zext i64 [[Y:%.*]] to i128
568; CHECK-NEXT:    [[DUPLICATED_MUL:%.*]] = mul nuw i128 [[XX]], [[YY]]
569; CHECK-NEXT:    call void @eat_i128(i128 [[DUPLICATED_MUL]])
570; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X]], 4294967295
571; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
572; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y]], 4294967295
573; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
574; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]]
575; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]]
576; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]]
577; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]]
578; CHECK-NEXT:    [[T0L:%.*]] = and i64 [[T0]], 4294967295
579; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
580; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T0H]], [[T1]]
581; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
582; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
583; CHECK-NEXT:    [[U1:%.*]] = add i64 [[U0L]], [[T2]]
584; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
585; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
586; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
587; CHECK-NEXT:    [[LO:%.*]] = or disjoint i64 [[U1LS]], [[T0L]]
588; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U2]], [[U1H]]
589; CHECK-NEXT:    [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0
590; CHECK-NEXT:    [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1
591; CHECK-NEXT:    ret { i64, i64 } [[RES]]
592;
593  %xx = zext i64 %x to i128
594  %yy = zext i64 %y to i128
595  %duplicated_mul = mul i128 %xx, %yy
596  call void @eat_i128(i128 %duplicated_mul)
597
598  %xl = and i64 %x, 4294967295
599  %xh = lshr i64 %x, 32
600  %yl = and i64 %y, 4294967295
601  %yh = lshr i64 %y, 32
602
603  %t0 = mul nuw i64 %yl, %xl
604  %t1 = mul nuw i64 %yl, %xh
605  %t2 = mul nuw i64 %yh, %xl
606  %t3 = mul nuw i64 %yh, %xh
607
608  %t0l = and i64 %t0, 4294967295
609  %t0h = lshr i64 %t0, 32
610
611  %u0 = add i64 %t0h, %t1
612  %u0l = and i64 %u0, 4294967295
613  %u0h = lshr i64 %u0, 32
614
615  %u1 = add i64 %u0l, %t2
616  %u1ls = shl i64 %u1, 32
617  %u1h = lshr i64 %u1, 32
618
619  %u2 = add i64 %u0h, %t3
620
621  %lo = or i64 %u1ls, %t0l
622  %hi = add i64 %u2, %u1h
623
624  %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0
625  %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1
626  ret { i64, i64 } %res
627}
628
629
630define i64 @umulhi_64_v2() {
631; CHECK-LABEL: @umulhi_64_v2(
632; CHECK-NEXT:    [[X:%.*]] = call i64 @get_number()
633; CHECK-NEXT:    [[Y:%.*]] = call i64 @get_number()
634; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y]], 4294967295
635; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
636; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
637; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X]], 4294967295
638; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[XH]], [[YH]]
639; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[XL]], [[YH]]
640; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[XH]], [[YL]]
641; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[XL]], [[YL]]
642; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
643; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T1]], [[T0H]]
644; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
645; CHECK-NEXT:    [[U1:%.*]] = add i64 [[T2]], [[U0L]]
646; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
647; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
648; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
649; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U1H]], [[U2]]
650; CHECK-NEXT:    ret i64 [[HI]]
651;
652  %x = call i64 @get_number()
653  %y = call i64 @get_number()
654
655  %yl = and i64 %y, 4294967295
656  %yh = lshr i64 %y, 32
657  %xh = lshr i64 %x, 32
658  %xl = and i64 %x, 4294967295
659
660  %t3 = mul nuw i64 %xh, %yh
661  %t2 = mul nuw i64 %xl, %yh
662  %t1 = mul nuw i64 %xh, %yl
663  %t0 = mul nuw i64 %xl, %yl
664
665  %t0h = lshr i64 %t0, 32
666  %u0 = add i64 %t1, %t0h
667  %u0l = and i64 %u0, 4294967295
668  %u1 = add i64 %t2, %u0l
669  %u0h = lshr i64 %u0, 32
670  %u2 = add i64 %u0h, %t3
671  %u1h = lshr i64 %u1, 32
672  %hi = add i64 %u1h, %u2
673
674  ret i64 %hi
675}
676
677
678define i64 @umulhi_64_v3() {
679; CHECK-LABEL: @umulhi_64_v3(
680; CHECK-NEXT:    [[X:%.*]] = call i64 @get_number()
681; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
682; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X]], 4294967295
683; CHECK-NEXT:    [[Y:%.*]] = call i64 @get_number()
684; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y]], 4294967295
685; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
686; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[XH]], [[YH]]
687; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[XL]], [[YH]]
688; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[XH]], [[YL]]
689; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[XL]], [[YL]]
690; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
691; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T1]], [[T0H]]
692; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
693; CHECK-NEXT:    [[U1:%.*]] = add i64 [[T2]], [[U0L]]
694; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
695; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
696; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
697; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U1H]], [[U2]]
698; CHECK-NEXT:    ret i64 [[HI]]
699;
700  %x = call i64 @get_number()
701  %xh = lshr i64 %x, 32
702  %xl = and i64 %x, 4294967295
703
704  %y = call i64 @get_number()
705  %yl = and i64 %y, 4294967295
706  %yh = lshr i64 %y, 32
707
708  %t3 = mul nuw i64 %xh, %yh
709  %t2 = mul nuw i64 %xl, %yh
710  %t1 = mul nuw i64 %xh, %yl
711  %t0 = mul nuw i64 %xl, %yl
712
713  %t0h = lshr i64 %t0, 32
714  %u0 = add i64 %t1, %t0h
715  %u0l = and i64 %u0, 4294967295
716  %u1 = add i64 %t2, %u0l
717  %u0h = lshr i64 %u0, 32
718  %u2 = add i64 %u0h, %t3
719  %u1h = lshr i64 %u1, 32
720  %hi = add i64 %u1h, %u2
721
722  ret i64 %hi
723}
724