xref: /llvm-project/llvm/test/CodeGen/X86/i128-mul.ll (revision e6bf48d11047e970cb24554a01b65b566d6b5d22)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-NOBMI
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi2 | FileCheck %s --check-prefix=X86-BMI
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64-NOBMI
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2 | FileCheck %s --check-prefix=X64-BMI
6
7; PR1198
8
9define i64 @foo(i64 %x, i64 %y) nounwind {
10; X86-NOBMI-LABEL: foo:
11; X86-NOBMI:       # %bb.0:
12; X86-NOBMI-NEXT:    pushl %ebp
13; X86-NOBMI-NEXT:    pushl %ebx
14; X86-NOBMI-NEXT:    pushl %edi
15; X86-NOBMI-NEXT:    pushl %esi
16; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
17; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
18; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
19; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
20; X86-NOBMI-NEXT:    movl %edi, %eax
21; X86-NOBMI-NEXT:    mull %esi
22; X86-NOBMI-NEXT:    movl %edx, %ebp
23; X86-NOBMI-NEXT:    movl %ebx, %eax
24; X86-NOBMI-NEXT:    mull %esi
25; X86-NOBMI-NEXT:    movl %edx, %esi
26; X86-NOBMI-NEXT:    movl %eax, %ebx
27; X86-NOBMI-NEXT:    addl %ebp, %ebx
28; X86-NOBMI-NEXT:    adcl $0, %esi
29; X86-NOBMI-NEXT:    movl %edi, %eax
30; X86-NOBMI-NEXT:    mull %ecx
31; X86-NOBMI-NEXT:    addl %ebx, %eax
32; X86-NOBMI-NEXT:    adcl %edx, %esi
33; X86-NOBMI-NEXT:    setb %al
34; X86-NOBMI-NEXT:    movzbl %al, %edi
35; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
36; X86-NOBMI-NEXT:    mull %ecx
37; X86-NOBMI-NEXT:    addl %esi, %eax
38; X86-NOBMI-NEXT:    adcl %edi, %edx
39; X86-NOBMI-NEXT:    popl %esi
40; X86-NOBMI-NEXT:    popl %edi
41; X86-NOBMI-NEXT:    popl %ebx
42; X86-NOBMI-NEXT:    popl %ebp
43; X86-NOBMI-NEXT:    retl
44;
45; X86-BMI-LABEL: foo:
46; X86-BMI:       # %bb.0:
47; X86-BMI-NEXT:    pushl %ebp
48; X86-BMI-NEXT:    pushl %ebx
49; X86-BMI-NEXT:    pushl %edi
50; X86-BMI-NEXT:    pushl %esi
51; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
52; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
53; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
54; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
55; X86-BMI-NEXT:    movl %ecx, %edx
56; X86-BMI-NEXT:    mulxl %edi, %ebx, %ebx
57; X86-BMI-NEXT:    movl %eax, %edx
58; X86-BMI-NEXT:    mulxl %edi, %edi, %ebp
59; X86-BMI-NEXT:    addl %ebx, %edi
60; X86-BMI-NEXT:    adcl $0, %ebp
61; X86-BMI-NEXT:    movl %ecx, %edx
62; X86-BMI-NEXT:    mulxl %esi, %ecx, %ebx
63; X86-BMI-NEXT:    addl %edi, %ecx
64; X86-BMI-NEXT:    adcl %ebp, %ebx
65; X86-BMI-NEXT:    setb %cl
66; X86-BMI-NEXT:    movzbl %cl, %ecx
67; X86-BMI-NEXT:    movl %eax, %edx
68; X86-BMI-NEXT:    mulxl %esi, %eax, %edx
69; X86-BMI-NEXT:    addl %ebx, %eax
70; X86-BMI-NEXT:    adcl %ecx, %edx
71; X86-BMI-NEXT:    popl %esi
72; X86-BMI-NEXT:    popl %edi
73; X86-BMI-NEXT:    popl %ebx
74; X86-BMI-NEXT:    popl %ebp
75; X86-BMI-NEXT:    retl
76;
77; X64-NOBMI-LABEL: foo:
78; X64-NOBMI:       # %bb.0:
79; X64-NOBMI-NEXT:    movq %rdi, %rax
80; X64-NOBMI-NEXT:    mulq %rsi
81; X64-NOBMI-NEXT:    movq %rdx, %rax
82; X64-NOBMI-NEXT:    retq
83;
84; X64-BMI-LABEL: foo:
85; X64-BMI:       # %bb.0:
86; X64-BMI-NEXT:    movq %rdi, %rdx
87; X64-BMI-NEXT:    mulxq %rsi, %rax, %rax
88; X64-BMI-NEXT:    retq
89  %tmp0 = zext i64 %x to i128
90  %tmp1 = zext i64 %y to i128
91  %tmp2 = mul i128 %tmp0, %tmp1
92  %tmp7 = zext i32 64 to i128
93  %tmp3 = lshr i128 %tmp2, %tmp7
94  %tmp4 = trunc i128 %tmp3 to i64
95  ret i64 %tmp4
96}
97
98; <rdar://problem/14096009> superfluous multiply by high part of
99; zero-extended value.
100
101define i64 @mul1(i64 %n, ptr nocapture %z, ptr nocapture %x, i64 %y) nounwind {
102; X86-NOBMI-LABEL: mul1:
103; X86-NOBMI:       # %bb.0: # %entry
104; X86-NOBMI-NEXT:    pushl %ebp
105; X86-NOBMI-NEXT:    pushl %ebx
106; X86-NOBMI-NEXT:    pushl %edi
107; X86-NOBMI-NEXT:    pushl %esi
108; X86-NOBMI-NEXT:    subl $20, %esp
109; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
110; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
111; X86-NOBMI-NEXT:    orl %ecx, %eax
112; X86-NOBMI-NEXT:    je .LBB1_3
113; X86-NOBMI-NEXT:  # %bb.1: # %for.body.preheader
114; X86-NOBMI-NEXT:    xorl %eax, %eax
115; X86-NOBMI-NEXT:    xorl %edx, %edx
116; X86-NOBMI-NEXT:    xorl %ecx, %ecx
117; X86-NOBMI-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
118; X86-NOBMI-NEXT:    .p2align 4
119; X86-NOBMI-NEXT:  .LBB1_2: # %for.body
120; X86-NOBMI-NEXT:    # =>This Inner Loop Header: Depth=1
121; X86-NOBMI-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
122; X86-NOBMI-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
123; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
124; X86-NOBMI-NEXT:    movl (%eax,%ecx,8), %edi
125; X86-NOBMI-NEXT:    movl 4(%eax,%ecx,8), %ebx
126; X86-NOBMI-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
127; X86-NOBMI-NEXT:    movl %edi, %eax
128; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
129; X86-NOBMI-NEXT:    mull %esi
130; X86-NOBMI-NEXT:    movl %edx, %ebp
131; X86-NOBMI-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
132; X86-NOBMI-NEXT:    movl %ebx, %eax
133; X86-NOBMI-NEXT:    mull %esi
134; X86-NOBMI-NEXT:    movl %edx, %ebx
135; X86-NOBMI-NEXT:    movl %eax, %esi
136; X86-NOBMI-NEXT:    addl %ebp, %esi
137; X86-NOBMI-NEXT:    adcl $0, %ebx
138; X86-NOBMI-NEXT:    movl %edi, %eax
139; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
140; X86-NOBMI-NEXT:    mull %edx
141; X86-NOBMI-NEXT:    movl %edx, %ebp
142; X86-NOBMI-NEXT:    movl %eax, %edi
143; X86-NOBMI-NEXT:    addl %esi, %edi
144; X86-NOBMI-NEXT:    adcl %ebx, %ebp
145; X86-NOBMI-NEXT:    setb %bl
146; X86-NOBMI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
147; X86-NOBMI-NEXT:    mull {{[0-9]+}}(%esp)
148; X86-NOBMI-NEXT:    addl %ebp, %eax
149; X86-NOBMI-NEXT:    movzbl %bl, %esi
150; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ebp
151; X86-NOBMI-NEXT:    adcl %esi, %edx
152; X86-NOBMI-NEXT:    movl %ecx, %ebx
153; X86-NOBMI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
154; X86-NOBMI-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
155; X86-NOBMI-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
156; X86-NOBMI-NEXT:    adcl $0, %eax
157; X86-NOBMI-NEXT:    adcl $0, %edx
158; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
159; X86-NOBMI-NEXT:    movl %ecx, (%esi,%ebx,8)
160; X86-NOBMI-NEXT:    movl %ebx, %ecx
161; X86-NOBMI-NEXT:    movl %edi, 4(%esi,%ebx,8)
162; X86-NOBMI-NEXT:    addl $1, %ecx
163; X86-NOBMI-NEXT:    movl (%esp), %edi # 4-byte Reload
164; X86-NOBMI-NEXT:    adcl $0, %edi
165; X86-NOBMI-NEXT:    movl %ecx, %esi
166; X86-NOBMI-NEXT:    xorl {{[0-9]+}}(%esp), %esi
167; X86-NOBMI-NEXT:    movl %edi, (%esp) # 4-byte Spill
168; X86-NOBMI-NEXT:    xorl %ebp, %edi
169; X86-NOBMI-NEXT:    orl %esi, %edi
170; X86-NOBMI-NEXT:    jne .LBB1_2
171; X86-NOBMI-NEXT:  .LBB1_3: # %for.end
172; X86-NOBMI-NEXT:    xorl %eax, %eax
173; X86-NOBMI-NEXT:    xorl %edx, %edx
174; X86-NOBMI-NEXT:    addl $20, %esp
175; X86-NOBMI-NEXT:    popl %esi
176; X86-NOBMI-NEXT:    popl %edi
177; X86-NOBMI-NEXT:    popl %ebx
178; X86-NOBMI-NEXT:    popl %ebp
179; X86-NOBMI-NEXT:    retl
180;
181; X86-BMI-LABEL: mul1:
182; X86-BMI:       # %bb.0: # %entry
183; X86-BMI-NEXT:    pushl %ebp
184; X86-BMI-NEXT:    pushl %ebx
185; X86-BMI-NEXT:    pushl %edi
186; X86-BMI-NEXT:    pushl %esi
187; X86-BMI-NEXT:    subl $20, %esp
188; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
189; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
190; X86-BMI-NEXT:    orl %ecx, %eax
191; X86-BMI-NEXT:    je .LBB1_3
192; X86-BMI-NEXT:  # %bb.1: # %for.body.preheader
193; X86-BMI-NEXT:    xorl %ecx, %ecx
194; X86-BMI-NEXT:    xorl %eax, %eax
195; X86-BMI-NEXT:    xorl %ebx, %ebx
196; X86-BMI-NEXT:    xorl %ebp, %ebp
197; X86-BMI-NEXT:    .p2align 4
198; X86-BMI-NEXT:  .LBB1_2: # %for.body
199; X86-BMI-NEXT:    # =>This Inner Loop Header: Depth=1
200; X86-BMI-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
201; X86-BMI-NEXT:    movl %eax, (%esp) # 4-byte Spill
202; X86-BMI-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
203; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
204; X86-BMI-NEXT:    movl (%eax,%ebx,8), %ecx
205; X86-BMI-NEXT:    movl 4(%eax,%ebx,8), %esi
206; X86-BMI-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
207; X86-BMI-NEXT:    movl %ecx, %edx
208; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
209; X86-BMI-NEXT:    mulxl %eax, %edx, %edi
210; X86-BMI-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
211; X86-BMI-NEXT:    movl %esi, %edx
212; X86-BMI-NEXT:    mulxl %eax, %esi, %eax
213; X86-BMI-NEXT:    addl %edi, %esi
214; X86-BMI-NEXT:    adcl $0, %eax
215; X86-BMI-NEXT:    movl %ecx, %edx
216; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
217; X86-BMI-NEXT:    mulxl %ecx, %edi, %ebp
218; X86-BMI-NEXT:    addl %esi, %edi
219; X86-BMI-NEXT:    adcl %eax, %ebp
220; X86-BMI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
221; X86-BMI-NEXT:    mulxl %ecx, %ecx, %eax
222; X86-BMI-NEXT:    setb %dl
223; X86-BMI-NEXT:    addl %ebp, %ecx
224; X86-BMI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
225; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
226; X86-BMI-NEXT:    movzbl %dl, %edx
227; X86-BMI-NEXT:    adcl %edx, %eax
228; X86-BMI-NEXT:    movl %eax, %edx
229; X86-BMI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
230; X86-BMI-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
231; X86-BMI-NEXT:    adcl (%esp), %edi # 4-byte Folded Reload
232; X86-BMI-NEXT:    adcl $0, %ecx
233; X86-BMI-NEXT:    adcl $0, %edx
234; X86-BMI-NEXT:    movl %edx, (%esp) # 4-byte Spill
235; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
236; X86-BMI-NEXT:    movl %eax, (%edx,%ebx,8)
237; X86-BMI-NEXT:    movl %edi, 4(%edx,%ebx,8)
238; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
239; X86-BMI-NEXT:    addl $1, %ebx
240; X86-BMI-NEXT:    adcl $0, %ebp
241; X86-BMI-NEXT:    movl %ebx, %edx
242; X86-BMI-NEXT:    xorl %esi, %edx
243; X86-BMI-NEXT:    movl %ebp, %esi
244; X86-BMI-NEXT:    xorl %edi, %esi
245; X86-BMI-NEXT:    orl %edx, %esi
246; X86-BMI-NEXT:    movl (%esp), %eax # 4-byte Reload
247; X86-BMI-NEXT:    jne .LBB1_2
248; X86-BMI-NEXT:  .LBB1_3: # %for.end
249; X86-BMI-NEXT:    xorl %eax, %eax
250; X86-BMI-NEXT:    xorl %edx, %edx
251; X86-BMI-NEXT:    addl $20, %esp
252; X86-BMI-NEXT:    popl %esi
253; X86-BMI-NEXT:    popl %edi
254; X86-BMI-NEXT:    popl %ebx
255; X86-BMI-NEXT:    popl %ebp
256; X86-BMI-NEXT:    retl
257;
258; X64-NOBMI-LABEL: mul1:
259; X64-NOBMI:       # %bb.0: # %entry
260; X64-NOBMI-NEXT:    testq %rdi, %rdi
261; X64-NOBMI-NEXT:    je .LBB1_3
262; X64-NOBMI-NEXT:  # %bb.1: # %for.body.preheader
263; X64-NOBMI-NEXT:    movq %rdx, %r8
264; X64-NOBMI-NEXT:    xorl %r10d, %r10d
265; X64-NOBMI-NEXT:    xorl %r9d, %r9d
266; X64-NOBMI-NEXT:    .p2align 4
267; X64-NOBMI-NEXT:  .LBB1_2: # %for.body
268; X64-NOBMI-NEXT:    # =>This Inner Loop Header: Depth=1
269; X64-NOBMI-NEXT:    movq %rcx, %rax
270; X64-NOBMI-NEXT:    mulq (%r8,%r9,8)
271; X64-NOBMI-NEXT:    addq %r10, %rax
272; X64-NOBMI-NEXT:    adcq $0, %rdx
273; X64-NOBMI-NEXT:    movq %rax, (%rsi,%r9,8)
274; X64-NOBMI-NEXT:    incq %r9
275; X64-NOBMI-NEXT:    cmpq %r9, %rdi
276; X64-NOBMI-NEXT:    movq %rdx, %r10
277; X64-NOBMI-NEXT:    jne .LBB1_2
278; X64-NOBMI-NEXT:  .LBB1_3: # %for.end
279; X64-NOBMI-NEXT:    xorl %eax, %eax
280; X64-NOBMI-NEXT:    retq
281;
282; X64-BMI-LABEL: mul1:
283; X64-BMI:       # %bb.0: # %entry
284; X64-BMI-NEXT:    testq %rdi, %rdi
285; X64-BMI-NEXT:    je .LBB1_3
286; X64-BMI-NEXT:  # %bb.1: # %for.body.preheader
287; X64-BMI-NEXT:    movq %rdx, %rax
288; X64-BMI-NEXT:    xorl %r9d, %r9d
289; X64-BMI-NEXT:    xorl %r8d, %r8d
290; X64-BMI-NEXT:    .p2align 4
291; X64-BMI-NEXT:  .LBB1_2: # %for.body
292; X64-BMI-NEXT:    # =>This Inner Loop Header: Depth=1
293; X64-BMI-NEXT:    movq %rcx, %rdx
294; X64-BMI-NEXT:    mulxq (%rax,%r8,8), %r10, %rdx
295; X64-BMI-NEXT:    addq %r9, %r10
296; X64-BMI-NEXT:    adcq $0, %rdx
297; X64-BMI-NEXT:    movq %r10, (%rsi,%r8,8)
298; X64-BMI-NEXT:    incq %r8
299; X64-BMI-NEXT:    cmpq %r8, %rdi
300; X64-BMI-NEXT:    movq %rdx, %r9
301; X64-BMI-NEXT:    jne .LBB1_2
302; X64-BMI-NEXT:  .LBB1_3: # %for.end
303; X64-BMI-NEXT:    xorl %eax, %eax
304; X64-BMI-NEXT:    retq
305entry:
306  %conv = zext i64 %y to i128
307  %cmp11 = icmp eq i64 %n, 0
308  br i1 %cmp11, label %for.end, label %for.body
309
310for.body:                                         ; preds = %entry, %for.body
311  %carry.013 = phi i64 [ %conv6, %for.body ], [ 0, %entry ]
312  %i.012 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
313  %arrayidx = getelementptr inbounds i64, ptr %x, i64 %i.012
314  %0 = load i64, ptr %arrayidx, align 8
315  %conv2 = zext i64 %0 to i128
316  %mul = mul i128 %conv2, %conv
317  %conv3 = zext i64 %carry.013 to i128
318  %add = add i128 %mul, %conv3
319  %conv4 = trunc i128 %add to i64
320  %arrayidx5 = getelementptr inbounds i64, ptr %z, i64 %i.012
321  store i64 %conv4, ptr %arrayidx5, align 8
322  %shr = lshr i128 %add, 64
323  %conv6 = trunc i128 %shr to i64
324  %inc = add i64 %i.012, 1
325  %exitcond = icmp eq i64 %inc, %n
326  br i1 %exitcond, label %for.end, label %for.body
327
328for.end:                                          ; preds = %for.body, %entry
329  ret i64 0
330}
331