xref: /llvm-project/llvm/test/CodeGen/X86/umul_fix.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
3; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
4
5declare  i4  @llvm.umul.fix.i4   (i4,  i4, i32)
6declare  i32 @llvm.umul.fix.i32  (i32, i32, i32)
7declare  i64 @llvm.umul.fix.i64  (i64, i64, i32)
8declare  <4 x i32> @llvm.umul.fix.v4i32(<4 x i32>, <4 x i32>, i32)
9
10define i32 @func(i32 %x, i32 %y) nounwind {
11; X64-LABEL: func:
12; X64:       # %bb.0:
13; X64-NEXT:    movl %esi, %eax
14; X64-NEXT:    movl %edi, %ecx
15; X64-NEXT:    imulq %rax, %rcx
16; X64-NEXT:    movq %rcx, %rax
17; X64-NEXT:    shrq $32, %rax
18; X64-NEXT:    shldl $30, %ecx, %eax
19; X64-NEXT:    # kill: def $eax killed $eax killed $rax
20; X64-NEXT:    retq
21;
22; X86-LABEL: func:
23; X86:       # %bb.0:
24; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
25; X86-NEXT:    mull {{[0-9]+}}(%esp)
26; X86-NEXT:    shrdl $2, %edx, %eax
27; X86-NEXT:    retl
28  %tmp = call i32 @llvm.umul.fix.i32(i32 %x, i32 %y, i32 2)
29  ret i32 %tmp
30}
31
32define i64 @func2(i64 %x, i64 %y) nounwind {
33; X64-LABEL: func2:
34; X64:       # %bb.0:
35; X64-NEXT:    movq %rdi, %rax
36; X64-NEXT:    mulq %rsi
37; X64-NEXT:    shrdq $2, %rdx, %rax
38; X64-NEXT:    retq
39;
40; X86-LABEL: func2:
41; X86:       # %bb.0:
42; X86-NEXT:    pushl %ebp
43; X86-NEXT:    pushl %ebx
44; X86-NEXT:    pushl %edi
45; X86-NEXT:    pushl %esi
46; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
47; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
48; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
49; X86-NEXT:    movl %esi, %eax
50; X86-NEXT:    mull {{[0-9]+}}(%esp)
51; X86-NEXT:    movl %edx, %edi
52; X86-NEXT:    movl %eax, %ebx
53; X86-NEXT:    movl %esi, %eax
54; X86-NEXT:    mull %ebp
55; X86-NEXT:    movl %eax, %esi
56; X86-NEXT:    addl %edx, %ebx
57; X86-NEXT:    adcl $0, %edi
58; X86-NEXT:    movl %ecx, %eax
59; X86-NEXT:    mull %ebp
60; X86-NEXT:    addl %ebx, %eax
61; X86-NEXT:    adcl %edi, %edx
62; X86-NEXT:    imull {{[0-9]+}}(%esp), %ecx
63; X86-NEXT:    addl %ecx, %edx
64; X86-NEXT:    shldl $30, %eax, %edx
65; X86-NEXT:    shldl $30, %esi, %eax
66; X86-NEXT:    popl %esi
67; X86-NEXT:    popl %edi
68; X86-NEXT:    popl %ebx
69; X86-NEXT:    popl %ebp
70; X86-NEXT:    retl
71  %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 2)
72  ret i64 %tmp
73}
74
75define i4 @func3(i4 %x, i4 %y) nounwind {
76; X64-LABEL: func3:
77; X64:       # %bb.0:
78; X64-NEXT:    movl %edi, %eax
79; X64-NEXT:    andb $15, %al
80; X64-NEXT:    andb $15, %sil
81; X64-NEXT:    # kill: def $al killed $al killed $eax
82; X64-NEXT:    mulb %sil
83; X64-NEXT:    shrb $2, %al
84; X64-NEXT:    retq
85;
86; X86-LABEL: func3:
87; X86:       # %bb.0:
88; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
89; X86-NEXT:    andb $15, %al
90; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
91; X86-NEXT:    andb $15, %cl
92; X86-NEXT:    mulb %cl
93; X86-NEXT:    shrb $2, %al
94; X86-NEXT:    retl
95  %tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 2)
96  ret i4 %tmp
97}
98
99define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
100; X64-LABEL: vec:
101; X64:       # %bb.0:
102; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
103; X64-NEXT:    pmuludq %xmm1, %xmm0
104; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
105; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
106; X64-NEXT:    pmuludq %xmm2, %xmm1
107; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
108; X64-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
109; X64-NEXT:    psrld $2, %xmm3
110; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
111; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
112; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
113; X64-NEXT:    pslld $30, %xmm0
114; X64-NEXT:    por %xmm3, %xmm0
115; X64-NEXT:    retq
116;
117; X86-LABEL: vec:
118; X86:       # %bb.0:
119; X86-NEXT:    pushl %ebp
120; X86-NEXT:    pushl %ebx
121; X86-NEXT:    pushl %edi
122; X86-NEXT:    pushl %esi
123; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
124; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
125; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
126; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
127; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
128; X86-NEXT:    mull {{[0-9]+}}(%esp)
129; X86-NEXT:    movl %edx, %esi
130; X86-NEXT:    shldl $30, %eax, %esi
131; X86-NEXT:    movl %ebx, %eax
132; X86-NEXT:    mull {{[0-9]+}}(%esp)
133; X86-NEXT:    movl %edx, %ebx
134; X86-NEXT:    shldl $30, %eax, %ebx
135; X86-NEXT:    movl %ebp, %eax
136; X86-NEXT:    mull {{[0-9]+}}(%esp)
137; X86-NEXT:    movl %edx, %ebp
138; X86-NEXT:    shldl $30, %eax, %ebp
139; X86-NEXT:    movl %edi, %eax
140; X86-NEXT:    mull {{[0-9]+}}(%esp)
141; X86-NEXT:    shldl $30, %eax, %edx
142; X86-NEXT:    movl %edx, 12(%ecx)
143; X86-NEXT:    movl %ebp, 8(%ecx)
144; X86-NEXT:    movl %ebx, 4(%ecx)
145; X86-NEXT:    movl %esi, (%ecx)
146; X86-NEXT:    movl %ecx, %eax
147; X86-NEXT:    popl %esi
148; X86-NEXT:    popl %edi
149; X86-NEXT:    popl %ebx
150; X86-NEXT:    popl %ebp
151; X86-NEXT:    retl $4
152  %tmp = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 2)
153  ret <4 x i32> %tmp
154}
155
156; These result in regular integer multiplication
157define i32 @func4(i32 %x, i32 %y) nounwind {
158; X64-LABEL: func4:
159; X64:       # %bb.0:
160; X64-NEXT:    movl %edi, %eax
161; X64-NEXT:    imull %esi, %eax
162; X64-NEXT:    retq
163;
164; X86-LABEL: func4:
165; X86:       # %bb.0:
166; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
167; X86-NEXT:    imull {{[0-9]+}}(%esp), %eax
168; X86-NEXT:    retl
169  %tmp = call i32 @llvm.umul.fix.i32(i32 %x, i32 %y, i32 0)
170  ret i32 %tmp
171}
172
173define i64 @func5(i64 %x, i64 %y) nounwind {
174; X64-LABEL: func5:
175; X64:       # %bb.0:
176; X64-NEXT:    movq %rdi, %rax
177; X64-NEXT:    imulq %rsi, %rax
178; X64-NEXT:    retq
179;
180; X86-LABEL: func5:
181; X86:       # %bb.0:
182; X86-NEXT:    pushl %esi
183; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
184; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
185; X86-NEXT:    movl %ecx, %eax
186; X86-NEXT:    mull %esi
187; X86-NEXT:    imull {{[0-9]+}}(%esp), %ecx
188; X86-NEXT:    addl %ecx, %edx
189; X86-NEXT:    imull {{[0-9]+}}(%esp), %esi
190; X86-NEXT:    addl %esi, %edx
191; X86-NEXT:    popl %esi
192; X86-NEXT:    retl
193  %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 0)
194  ret i64 %tmp
195}
196
197define i4 @func6(i4 %x, i4 %y) nounwind {
198; X64-LABEL: func6:
199; X64:       # %bb.0:
200; X64-NEXT:    movl %edi, %eax
201; X64-NEXT:    andb $15, %al
202; X64-NEXT:    andb $15, %sil
203; X64-NEXT:    # kill: def $al killed $al killed $eax
204; X64-NEXT:    mulb %sil
205; X64-NEXT:    retq
206;
207; X86-LABEL: func6:
208; X86:       # %bb.0:
209; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
210; X86-NEXT:    andb $15, %al
211; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
212; X86-NEXT:    andb $15, %cl
213; X86-NEXT:    mulb %cl
214; X86-NEXT:    retl
215  %tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 0)
216  ret i4 %tmp
217}
218
219define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
220; X64-LABEL: vec2:
221; X64:       # %bb.0:
222; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
223; X64-NEXT:    pmuludq %xmm1, %xmm0
224; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
225; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
226; X64-NEXT:    pmuludq %xmm2, %xmm1
227; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
228; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
229; X64-NEXT:    retq
230;
231; X86-LABEL: vec2:
232; X86:       # %bb.0:
233; X86-NEXT:    pushl %edi
234; X86-NEXT:    pushl %esi
235; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
236; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
237; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
238; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
239; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
240; X86-NEXT:    imull {{[0-9]+}}(%esp), %edi
241; X86-NEXT:    imull {{[0-9]+}}(%esp), %esi
242; X86-NEXT:    imull {{[0-9]+}}(%esp), %edx
243; X86-NEXT:    imull {{[0-9]+}}(%esp), %ecx
244; X86-NEXT:    movl %ecx, 12(%eax)
245; X86-NEXT:    movl %edx, 8(%eax)
246; X86-NEXT:    movl %esi, 4(%eax)
247; X86-NEXT:    movl %edi, (%eax)
248; X86-NEXT:    popl %esi
249; X86-NEXT:    popl %edi
250; X86-NEXT:    retl $4
251  %tmp = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0)
252  ret <4 x i32> %tmp
253}
254
255define i64 @func7(i64 %x, i64 %y) nounwind {
256; X64-LABEL: func7:
257; X64:       # %bb.0:
258; X64-NEXT:    movq %rdi, %rax
259; X64-NEXT:    mulq %rsi
260; X64-NEXT:    shrdq $32, %rdx, %rax
261; X64-NEXT:    retq
262;
263; X86-LABEL: func7:
264; X86:       # %bb.0:
265; X86-NEXT:    pushl %ebp
266; X86-NEXT:    pushl %ebx
267; X86-NEXT:    pushl %edi
268; X86-NEXT:    pushl %esi
269; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
270; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
271; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
272; X86-NEXT:    movl %ebx, %eax
273; X86-NEXT:    mull {{[0-9]+}}(%esp)
274; X86-NEXT:    movl %edx, %esi
275; X86-NEXT:    movl %eax, %edi
276; X86-NEXT:    movl %ebx, %eax
277; X86-NEXT:    mull %ebp
278; X86-NEXT:    addl %edx, %edi
279; X86-NEXT:    adcl $0, %esi
280; X86-NEXT:    movl %ecx, %eax
281; X86-NEXT:    mull %ebp
282; X86-NEXT:    addl %edi, %eax
283; X86-NEXT:    adcl %esi, %edx
284; X86-NEXT:    imull {{[0-9]+}}(%esp), %ecx
285; X86-NEXT:    addl %ecx, %edx
286; X86-NEXT:    popl %esi
287; X86-NEXT:    popl %edi
288; X86-NEXT:    popl %ebx
289; X86-NEXT:    popl %ebp
290; X86-NEXT:    retl
291  %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 32)
292  ret i64 %tmp
293}
294
295define i64 @func8(i64 %x, i64 %y) nounwind {
296; X64-LABEL: func8:
297; X64:       # %bb.0:
298; X64-NEXT:    movq %rdi, %rax
299; X64-NEXT:    mulq %rsi
300; X64-NEXT:    shrdq $63, %rdx, %rax
301; X64-NEXT:    retq
302;
303; X86-LABEL: func8:
304; X86:       # %bb.0:
305; X86-NEXT:    pushl %ebp
306; X86-NEXT:    pushl %ebx
307; X86-NEXT:    pushl %edi
308; X86-NEXT:    pushl %esi
309; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
310; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
311; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
312; X86-NEXT:    movl %esi, %eax
313; X86-NEXT:    mull %ebp
314; X86-NEXT:    movl %edx, %ecx
315; X86-NEXT:    movl %eax, %edi
316; X86-NEXT:    movl %esi, %eax
317; X86-NEXT:    mull %ebx
318; X86-NEXT:    addl %edx, %edi
319; X86-NEXT:    adcl $0, %ecx
320; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
321; X86-NEXT:    mull %ebp
322; X86-NEXT:    movl %edx, %esi
323; X86-NEXT:    movl %eax, %ebp
324; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
325; X86-NEXT:    mull %ebx
326; X86-NEXT:    addl %edi, %eax
327; X86-NEXT:    adcl %edx, %ecx
328; X86-NEXT:    adcl $0, %esi
329; X86-NEXT:    addl %ebp, %ecx
330; X86-NEXT:    adcl $0, %esi
331; X86-NEXT:    shldl $1, %ecx, %esi
332; X86-NEXT:    shrdl $31, %ecx, %eax
333; X86-NEXT:    movl %esi, %edx
334; X86-NEXT:    popl %esi
335; X86-NEXT:    popl %edi
336; X86-NEXT:    popl %ebx
337; X86-NEXT:    popl %ebp
338; X86-NEXT:    retl
339  %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 63)
340  ret i64 %tmp
341}
342
343define i64 @func9(i64 %x, i64 %y) nounwind {
344; X64-LABEL: func9:
345; X64:       # %bb.0:
346; X64-NEXT:    movq %rdi, %rax
347; X64-NEXT:    mulq %rsi
348; X64-NEXT:    movq %rdx, %rax
349; X64-NEXT:    retq
350;
351; X86-LABEL: func9:
352; X86:       # %bb.0:
353; X86-NEXT:    pushl %ebp
354; X86-NEXT:    pushl %ebx
355; X86-NEXT:    pushl %edi
356; X86-NEXT:    pushl %esi
357; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
358; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
359; X86-NEXT:    movl %edi, %eax
360; X86-NEXT:    mull %esi
361; X86-NEXT:    movl %edx, %ecx
362; X86-NEXT:    movl %eax, %ebp
363; X86-NEXT:    movl %edi, %eax
364; X86-NEXT:    mull {{[0-9]+}}(%esp)
365; X86-NEXT:    movl %edx, %ebx
366; X86-NEXT:    addl %ebp, %ebx
367; X86-NEXT:    adcl $0, %ecx
368; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
369; X86-NEXT:    mull %esi
370; X86-NEXT:    movl %edx, %edi
371; X86-NEXT:    movl %eax, %ebp
372; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
373; X86-NEXT:    mull {{[0-9]+}}(%esp)
374; X86-NEXT:    addl %ebx, %eax
375; X86-NEXT:    adcl %edx, %ecx
376; X86-NEXT:    adcl $0, %edi
377; X86-NEXT:    addl %ebp, %ecx
378; X86-NEXT:    adcl $0, %edi
379; X86-NEXT:    movl %ecx, %eax
380; X86-NEXT:    movl %edi, %edx
381; X86-NEXT:    popl %esi
382; X86-NEXT:    popl %edi
383; X86-NEXT:    popl %ebx
384; X86-NEXT:    popl %ebp
385; X86-NEXT:    retl
386  %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 64)
387  ret i64 %tmp
388}
389