xref: /llvm-project/llvm/test/CodeGen/X86/smul_fix_sat.ll (revision e30a4fc3e20bf5d9cc2f5bfcb61b4eb0e686a193)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
3; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
4
5declare  i4  @llvm.smul.fix.sat.i4   (i4,  i4, i32)
6declare  i32 @llvm.smul.fix.sat.i32  (i32, i32, i32)
7declare  i64 @llvm.smul.fix.sat.i64  (i64, i64, i32)
8declare  <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32>, <4 x i32>, i32)
9
10define i32 @func(i32 %x, i32 %y) nounwind {
11; X64-LABEL: func:
12; X64:       # %bb.0:
13; X64-NEXT:    movslq %esi, %rax
14; X64-NEXT:    movslq %edi, %rcx
15; X64-NEXT:    imulq %rax, %rcx
16; X64-NEXT:    movq %rcx, %rax
17; X64-NEXT:    shrq $32, %rax
18; X64-NEXT:    shrdl $2, %eax, %ecx
19; X64-NEXT:    cmpl $2, %eax
20; X64-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
21; X64-NEXT:    cmovll %ecx, %edx
22; X64-NEXT:    cmpl $-2, %eax
23; X64-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
24; X64-NEXT:    cmovgel %edx, %eax
25; X64-NEXT:    retq
26;
27; X86-LABEL: func:
28; X86:       # %bb.0:
29; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
30; X86-NEXT:    imull {{[0-9]+}}(%esp)
31; X86-NEXT:    shrdl $2, %edx, %eax
32; X86-NEXT:    cmpl $2, %edx
33; X86-NEXT:    movl $2147483647, %ecx # imm = 0x7FFFFFFF
34; X86-NEXT:    cmovgel %ecx, %eax
35; X86-NEXT:    cmpl $-2, %edx
36; X86-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
37; X86-NEXT:    cmovll %ecx, %eax
38; X86-NEXT:    retl
39  %tmp = call i32 @llvm.smul.fix.sat.i32(i32 %x, i32 %y, i32 2)
40  ret i32 %tmp
41}
42
43define i64 @func2(i64 %x, i64 %y) nounwind {
44; X64-LABEL: func2:
45; X64:       # %bb.0:
46; X64-NEXT:    movq %rdi, %rax
47; X64-NEXT:    imulq %rsi
48; X64-NEXT:    shrdq $2, %rdx, %rax
49; X64-NEXT:    cmpq $2, %rdx
50; X64-NEXT:    movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
51; X64-NEXT:    cmovgeq %rcx, %rax
52; X64-NEXT:    cmpq $-2, %rdx
53; X64-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
54; X64-NEXT:    cmovlq %rcx, %rax
55; X64-NEXT:    retq
56;
57; X86-LABEL: func2:
58; X86:       # %bb.0:
59; X86-NEXT:    pushl %ebp
60; X86-NEXT:    pushl %ebx
61; X86-NEXT:    pushl %edi
62; X86-NEXT:    pushl %esi
63; X86-NEXT:    subl $8, %esp
64; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
65; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
66; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
67; X86-NEXT:    movl %ecx, %eax
68; X86-NEXT:    mull %esi
69; X86-NEXT:    movl %edx, %edi
70; X86-NEXT:    movl %eax, %ebp
71; X86-NEXT:    movl %ecx, %eax
72; X86-NEXT:    mull %ebx
73; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
74; X86-NEXT:    addl %edx, %ebp
75; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
76; X86-NEXT:    adcl $0, %edi
77; X86-NEXT:    movl %ecx, %eax
78; X86-NEXT:    imull %esi
79; X86-NEXT:    movl %edx, %esi
80; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
81; X86-NEXT:    movl %ecx, %eax
82; X86-NEXT:    mull %ebx
83; X86-NEXT:    movl %eax, %ebx
84; X86-NEXT:    addl %ebp, %ebx
85; X86-NEXT:    adcl %edi, %edx
86; X86-NEXT:    adcl $0, %esi
87; X86-NEXT:    addl (%esp), %edx # 4-byte Folded Reload
88; X86-NEXT:    adcl $0, %esi
89; X86-NEXT:    movl %edx, %edi
90; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
91; X86-NEXT:    movl %esi, %ebp
92; X86-NEXT:    sbbl $0, %ebp
93; X86-NEXT:    testl %ecx, %ecx
94; X86-NEXT:    cmovnsl %esi, %ebp
95; X86-NEXT:    cmovnsl %edx, %edi
96; X86-NEXT:    movl %edi, %ecx
97; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
98; X86-NEXT:    movl %ebp, %edx
99; X86-NEXT:    sbbl $0, %edx
100; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
101; X86-NEXT:    cmovnsl %ebp, %edx
102; X86-NEXT:    cmovnsl %edi, %ecx
103; X86-NEXT:    testl %edx, %edx
104; X86-NEXT:    setg %ah
105; X86-NEXT:    sete (%esp) # 1-byte Folded Spill
106; X86-NEXT:    cmpl $2, %ecx
107; X86-NEXT:    setae %al
108; X86-NEXT:    andb (%esp), %al # 1-byte Folded Reload
109; X86-NEXT:    orb %ah, %al
110; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
111; X86-NEXT:    shrdl $2, %ebx, %ebp
112; X86-NEXT:    shrdl $2, %ecx, %ebx
113; X86-NEXT:    testb %al, %al
114; X86-NEXT:    movl $2147483647, %esi # imm = 0x7FFFFFFF
115; X86-NEXT:    cmovel %ebx, %esi
116; X86-NEXT:    movl $-1, %edi
117; X86-NEXT:    cmovel %ebp, %edi
118; X86-NEXT:    cmpl $-1, %edx
119; X86-NEXT:    setl %dl
120; X86-NEXT:    sete %al
121; X86-NEXT:    cmpl $-2, %ecx
122; X86-NEXT:    setb %cl
123; X86-NEXT:    andb %al, %cl
124; X86-NEXT:    xorl %eax, %eax
125; X86-NEXT:    orb %dl, %cl
126; X86-NEXT:    cmovel %edi, %eax
127; X86-NEXT:    movl $-2147483648, %edx # imm = 0x80000000
128; X86-NEXT:    cmovel %esi, %edx
129; X86-NEXT:    addl $8, %esp
130; X86-NEXT:    popl %esi
131; X86-NEXT:    popl %edi
132; X86-NEXT:    popl %ebx
133; X86-NEXT:    popl %ebp
134; X86-NEXT:    retl
135  %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 2)
136  ret i64 %tmp
137}
138
139define i4 @func3(i4 %x, i4 %y) nounwind {
140; X64-LABEL: func3:
141; X64:       # %bb.0:
142; X64-NEXT:    shlb $4, %sil
143; X64-NEXT:    sarb $4, %sil
144; X64-NEXT:    shlb $4, %dil
145; X64-NEXT:    movsbl %dil, %eax
146; X64-NEXT:    movsbl %sil, %ecx
147; X64-NEXT:    imull %eax, %ecx
148; X64-NEXT:    movl %ecx, %eax
149; X64-NEXT:    shrb $2, %al
150; X64-NEXT:    shrl $8, %ecx
151; X64-NEXT:    movl %ecx, %edx
152; X64-NEXT:    shlb $6, %dl
153; X64-NEXT:    orb %al, %dl
154; X64-NEXT:    movzbl %dl, %eax
155; X64-NEXT:    cmpb $2, %cl
156; X64-NEXT:    movl $127, %edx
157; X64-NEXT:    cmovll %eax, %edx
158; X64-NEXT:    cmpb $-2, %cl
159; X64-NEXT:    movl $128, %eax
160; X64-NEXT:    cmovgel %edx, %eax
161; X64-NEXT:    sarb $4, %al
162; X64-NEXT:    # kill: def $al killed $al killed $eax
163; X64-NEXT:    retq
164;
165; X86-LABEL: func3:
166; X86:       # %bb.0:
167; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
168; X86-NEXT:    shlb $4, %al
169; X86-NEXT:    sarb $4, %al
170; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
171; X86-NEXT:    shlb $4, %cl
172; X86-NEXT:    movsbl %cl, %ecx
173; X86-NEXT:    movsbl %al, %eax
174; X86-NEXT:    imull %ecx, %eax
175; X86-NEXT:    movb %ah, %cl
176; X86-NEXT:    shlb $6, %cl
177; X86-NEXT:    shrb $2, %al
178; X86-NEXT:    orb %cl, %al
179; X86-NEXT:    movzbl %al, %ecx
180; X86-NEXT:    cmpb $2, %ah
181; X86-NEXT:    movl $127, %edx
182; X86-NEXT:    cmovll %ecx, %edx
183; X86-NEXT:    cmpb $-2, %ah
184; X86-NEXT:    movl $128, %eax
185; X86-NEXT:    cmovgel %edx, %eax
186; X86-NEXT:    sarb $4, %al
187; X86-NEXT:    # kill: def $al killed $al killed $eax
188; X86-NEXT:    retl
189  %tmp = call i4 @llvm.smul.fix.sat.i4(i4 %x, i4 %y, i32 2)
190  ret i4 %tmp
191}
192
193define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
194; X64-LABEL: vec:
195; X64:       # %bb.0:
196; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
197; X64-NEXT:    movd %xmm2, %eax
198; X64-NEXT:    cltq
199; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
200; X64-NEXT:    movd %xmm2, %ecx
201; X64-NEXT:    movslq %ecx, %rdx
202; X64-NEXT:    imulq %rax, %rdx
203; X64-NEXT:    movq %rdx, %rcx
204; X64-NEXT:    shrq $32, %rcx
205; X64-NEXT:    shrdl $2, %ecx, %edx
206; X64-NEXT:    cmpl $2, %ecx
207; X64-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
208; X64-NEXT:    cmovgel %eax, %edx
209; X64-NEXT:    cmpl $-2, %ecx
210; X64-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
211; X64-NEXT:    cmovll %ecx, %edx
212; X64-NEXT:    movd %edx, %xmm2
213; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
214; X64-NEXT:    movd %xmm3, %edx
215; X64-NEXT:    movslq %edx, %rdx
216; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
217; X64-NEXT:    movd %xmm3, %esi
218; X64-NEXT:    movslq %esi, %rsi
219; X64-NEXT:    imulq %rdx, %rsi
220; X64-NEXT:    movq %rsi, %rdx
221; X64-NEXT:    shrq $32, %rdx
222; X64-NEXT:    shrdl $2, %edx, %esi
223; X64-NEXT:    cmpl $2, %edx
224; X64-NEXT:    cmovgel %eax, %esi
225; X64-NEXT:    cmpl $-2, %edx
226; X64-NEXT:    cmovll %ecx, %esi
227; X64-NEXT:    movd %esi, %xmm3
228; X64-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
229; X64-NEXT:    movd %xmm1, %edx
230; X64-NEXT:    movslq %edx, %rdx
231; X64-NEXT:    movd %xmm0, %esi
232; X64-NEXT:    movslq %esi, %rsi
233; X64-NEXT:    imulq %rdx, %rsi
234; X64-NEXT:    movq %rsi, %rdx
235; X64-NEXT:    shrq $32, %rdx
236; X64-NEXT:    shrdl $2, %edx, %esi
237; X64-NEXT:    cmpl $2, %edx
238; X64-NEXT:    cmovgel %eax, %esi
239; X64-NEXT:    cmpl $-2, %edx
240; X64-NEXT:    cmovll %ecx, %esi
241; X64-NEXT:    movd %esi, %xmm2
242; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
243; X64-NEXT:    movd %xmm1, %edx
244; X64-NEXT:    movslq %edx, %rdx
245; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
246; X64-NEXT:    movd %xmm0, %esi
247; X64-NEXT:    movslq %esi, %rsi
248; X64-NEXT:    imulq %rdx, %rsi
249; X64-NEXT:    movq %rsi, %rdx
250; X64-NEXT:    shrq $32, %rdx
251; X64-NEXT:    shrdl $2, %edx, %esi
252; X64-NEXT:    cmpl $2, %edx
253; X64-NEXT:    cmovgel %eax, %esi
254; X64-NEXT:    cmpl $-2, %edx
255; X64-NEXT:    cmovll %ecx, %esi
256; X64-NEXT:    movd %esi, %xmm0
257; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
258; X64-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
259; X64-NEXT:    movdqa %xmm2, %xmm0
260; X64-NEXT:    retq
261;
262; X86-LABEL: vec:
263; X86:       # %bb.0:
264; X86-NEXT:    pushl %ebp
265; X86-NEXT:    pushl %ebx
266; X86-NEXT:    pushl %edi
267; X86-NEXT:    pushl %esi
268; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
269; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
270; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
271; X86-NEXT:    imull {{[0-9]+}}(%esp)
272; X86-NEXT:    movl %eax, %ecx
273; X86-NEXT:    shrdl $2, %edx, %ecx
274; X86-NEXT:    cmpl $2, %edx
275; X86-NEXT:    movl $2147483647, %ebp # imm = 0x7FFFFFFF
276; X86-NEXT:    cmovgel %ebp, %ecx
277; X86-NEXT:    cmpl $-2, %edx
278; X86-NEXT:    movl $-2147483648, %esi # imm = 0x80000000
279; X86-NEXT:    cmovll %esi, %ecx
280; X86-NEXT:    movl %edi, %eax
281; X86-NEXT:    imull {{[0-9]+}}(%esp)
282; X86-NEXT:    movl %eax, %edi
283; X86-NEXT:    shrdl $2, %edx, %edi
284; X86-NEXT:    cmpl $2, %edx
285; X86-NEXT:    cmovgel %ebp, %edi
286; X86-NEXT:    cmpl $-2, %edx
287; X86-NEXT:    cmovll %esi, %edi
288; X86-NEXT:    movl %ebx, %eax
289; X86-NEXT:    imull {{[0-9]+}}(%esp)
290; X86-NEXT:    movl %eax, %ebx
291; X86-NEXT:    shrdl $2, %edx, %ebx
292; X86-NEXT:    cmpl $2, %edx
293; X86-NEXT:    cmovgel %ebp, %ebx
294; X86-NEXT:    cmpl $-2, %edx
295; X86-NEXT:    cmovll %esi, %ebx
296; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
297; X86-NEXT:    imull {{[0-9]+}}(%esp)
298; X86-NEXT:    shrdl $2, %edx, %eax
299; X86-NEXT:    cmpl $2, %edx
300; X86-NEXT:    cmovgel %ebp, %eax
301; X86-NEXT:    cmpl $-2, %edx
302; X86-NEXT:    cmovll %esi, %eax
303; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
304; X86-NEXT:    movl %eax, 12(%edx)
305; X86-NEXT:    movl %ebx, 8(%edx)
306; X86-NEXT:    movl %edi, 4(%edx)
307; X86-NEXT:    movl %ecx, (%edx)
308; X86-NEXT:    movl %edx, %eax
309; X86-NEXT:    popl %esi
310; X86-NEXT:    popl %edi
311; X86-NEXT:    popl %ebx
312; X86-NEXT:    popl %ebp
313; X86-NEXT:    retl $4
314  %tmp = call <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32> %x, <4 x i32> %y, i32 2)
315  ret <4 x i32> %tmp
316}
317
318; These result in regular integer multiplication
319define i32 @func4(i32 %x, i32 %y) nounwind {
320; X64-LABEL: func4:
321; X64:       # %bb.0:
322; X64-NEXT:    xorl %eax, %eax
323; X64-NEXT:    movl %edi, %ecx
324; X64-NEXT:    xorl %esi, %ecx
325; X64-NEXT:    sets %al
326; X64-NEXT:    addl $2147483647, %eax # imm = 0x7FFFFFFF
327; X64-NEXT:    imull %esi, %edi
328; X64-NEXT:    cmovnol %edi, %eax
329; X64-NEXT:    retq
330;
331; X86-LABEL: func4:
332; X86:       # %bb.0:
333; X86-NEXT:    pushl %esi
334; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
335; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
336; X86-NEXT:    xorl %ecx, %ecx
337; X86-NEXT:    movl %eax, %esi
338; X86-NEXT:    xorl %edx, %esi
339; X86-NEXT:    sets %cl
340; X86-NEXT:    addl $2147483647, %ecx # imm = 0x7FFFFFFF
341; X86-NEXT:    imull %edx, %eax
342; X86-NEXT:    cmovol %ecx, %eax
343; X86-NEXT:    popl %esi
344; X86-NEXT:    retl
345  %tmp = call i32 @llvm.smul.fix.sat.i32(i32 %x, i32 %y, i32 0)
346  ret i32 %tmp
347}
348
349define i64 @func5(i64 %x, i64 %y) {
350; X64-LABEL: func5:
351; X64:       # %bb.0:
352; X64-NEXT:    xorl %ecx, %ecx
353; X64-NEXT:    movq %rdi, %rax
354; X64-NEXT:    xorq %rsi, %rax
355; X64-NEXT:    sets %cl
356; X64-NEXT:    movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
357; X64-NEXT:    addq %rcx, %rax
358; X64-NEXT:    imulq %rsi, %rdi
359; X64-NEXT:    cmovnoq %rdi, %rax
360; X64-NEXT:    retq
361;
362; X86-LABEL: func5:
363; X86:       # %bb.0:
364; X86-NEXT:    pushl %ebp
365; X86-NEXT:    .cfi_def_cfa_offset 8
366; X86-NEXT:    pushl %ebx
367; X86-NEXT:    .cfi_def_cfa_offset 12
368; X86-NEXT:    pushl %edi
369; X86-NEXT:    .cfi_def_cfa_offset 16
370; X86-NEXT:    pushl %esi
371; X86-NEXT:    .cfi_def_cfa_offset 20
372; X86-NEXT:    subl $8, %esp
373; X86-NEXT:    .cfi_def_cfa_offset 28
374; X86-NEXT:    .cfi_offset %esi, -20
375; X86-NEXT:    .cfi_offset %edi, -16
376; X86-NEXT:    .cfi_offset %ebx, -12
377; X86-NEXT:    .cfi_offset %ebp, -8
378; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
379; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
380; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
381; X86-NEXT:    movl %ecx, %esi
382; X86-NEXT:    sarl $31, %esi
383; X86-NEXT:    imull %edi, %esi
384; X86-NEXT:    mull %edi
385; X86-NEXT:    movl %edx, %ebx
386; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
387; X86-NEXT:    movl %ecx, %eax
388; X86-NEXT:    mull %edi
389; X86-NEXT:    movl %edx, %ebp
390; X86-NEXT:    addl %eax, %ebx
391; X86-NEXT:    movl %ebx, (%esp) # 4-byte Spill
392; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
393; X86-NEXT:    adcl %esi, %ebp
394; X86-NEXT:    movl %ebp, %edi
395; X86-NEXT:    sarl $31, %edi
396; X86-NEXT:    movl %edx, %esi
397; X86-NEXT:    sarl $31, %esi
398; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
399; X86-NEXT:    imull %eax, %esi
400; X86-NEXT:    mull %edx
401; X86-NEXT:    movl %edx, %ebx
402; X86-NEXT:    addl %eax, (%esp) # 4-byte Folded Spill
403; X86-NEXT:    adcl %esi, %ebx
404; X86-NEXT:    movl %ebx, %esi
405; X86-NEXT:    sarl $31, %esi
406; X86-NEXT:    addl %ebp, %ebx
407; X86-NEXT:    adcl %edi, %esi
408; X86-NEXT:    movl %ecx, %eax
409; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
410; X86-NEXT:    imull %ebp
411; X86-NEXT:    addl %ebx, %eax
412; X86-NEXT:    adcl %esi, %edx
413; X86-NEXT:    movl (%esp), %edi # 4-byte Reload
414; X86-NEXT:    movl %edi, %esi
415; X86-NEXT:    sarl $31, %esi
416; X86-NEXT:    xorl %esi, %edx
417; X86-NEXT:    xorl %eax, %esi
418; X86-NEXT:    xorl %ebp, %ecx
419; X86-NEXT:    sarl $31, %ecx
420; X86-NEXT:    movl %ecx, %ebx
421; X86-NEXT:    xorl $2147483647, %ebx # imm = 0x7FFFFFFF
422; X86-NEXT:    orl %edx, %esi
423; X86-NEXT:    notl %ecx
424; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
425; X86-NEXT:    cmovel %edi, %ebx
426; X86-NEXT:    movl %ecx, %eax
427; X86-NEXT:    movl %ebx, %edx
428; X86-NEXT:    addl $8, %esp
429; X86-NEXT:    .cfi_def_cfa_offset 20
430; X86-NEXT:    popl %esi
431; X86-NEXT:    .cfi_def_cfa_offset 16
432; X86-NEXT:    popl %edi
433; X86-NEXT:    .cfi_def_cfa_offset 12
434; X86-NEXT:    popl %ebx
435; X86-NEXT:    .cfi_def_cfa_offset 8
436; X86-NEXT:    popl %ebp
437; X86-NEXT:    .cfi_def_cfa_offset 4
438; X86-NEXT:    retl
439  %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 0)
440  ret i64 %tmp
441}
442
443define i4 @func6(i4 %x, i4 %y) nounwind {
444; X64-LABEL: func6:
445; X64:       # %bb.0:
446; X64-NEXT:    movl %edi, %eax
447; X64-NEXT:    shlb $4, %sil
448; X64-NEXT:    sarb $4, %sil
449; X64-NEXT:    shlb $4, %al
450; X64-NEXT:    xorl %ecx, %ecx
451; X64-NEXT:    movl %eax, %edx
452; X64-NEXT:    xorb %sil, %dl
453; X64-NEXT:    sets %cl
454; X64-NEXT:    addl $127, %ecx
455; X64-NEXT:    # kill: def $al killed $al killed $eax
456; X64-NEXT:    imulb %sil
457; X64-NEXT:    movzbl %al, %eax
458; X64-NEXT:    cmovol %ecx, %eax
459; X64-NEXT:    sarb $4, %al
460; X64-NEXT:    # kill: def $al killed $al killed $eax
461; X64-NEXT:    retq
462;
463; X86-LABEL: func6:
464; X86:       # %bb.0:
465; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
466; X86-NEXT:    shlb $4, %cl
467; X86-NEXT:    sarb $4, %cl
468; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
469; X86-NEXT:    shlb $4, %al
470; X86-NEXT:    xorl %edx, %edx
471; X86-NEXT:    movb %al, %ah
472; X86-NEXT:    xorb %cl, %ah
473; X86-NEXT:    sets %dl
474; X86-NEXT:    addl $127, %edx
475; X86-NEXT:    imulb %cl
476; X86-NEXT:    movzbl %al, %eax
477; X86-NEXT:    cmovol %edx, %eax
478; X86-NEXT:    sarb $4, %al
479; X86-NEXT:    # kill: def $al killed $al killed $eax
480; X86-NEXT:    retl
481  %tmp = call i4 @llvm.smul.fix.sat.i4(i4 %x, i4 %y, i32 0)
482  ret i4 %tmp
483}
484
485define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
486; X64-LABEL: vec2:
487; X64:       # %bb.0:
488; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
489; X64-NEXT:    movd %xmm2, %eax
490; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
491; X64-NEXT:    movd %xmm2, %ecx
492; X64-NEXT:    xorl %edx, %edx
493; X64-NEXT:    movl %ecx, %esi
494; X64-NEXT:    xorl %eax, %esi
495; X64-NEXT:    sets %dl
496; X64-NEXT:    addl $2147483647, %edx # imm = 0x7FFFFFFF
497; X64-NEXT:    imull %eax, %ecx
498; X64-NEXT:    cmovol %edx, %ecx
499; X64-NEXT:    movd %ecx, %xmm2
500; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
501; X64-NEXT:    movd %xmm3, %eax
502; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
503; X64-NEXT:    movd %xmm3, %ecx
504; X64-NEXT:    xorl %edx, %edx
505; X64-NEXT:    movl %ecx, %esi
506; X64-NEXT:    xorl %eax, %esi
507; X64-NEXT:    sets %dl
508; X64-NEXT:    addl $2147483647, %edx # imm = 0x7FFFFFFF
509; X64-NEXT:    imull %eax, %ecx
510; X64-NEXT:    cmovol %edx, %ecx
511; X64-NEXT:    movd %ecx, %xmm3
512; X64-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
513; X64-NEXT:    movd %xmm1, %eax
514; X64-NEXT:    movd %xmm0, %ecx
515; X64-NEXT:    xorl %edx, %edx
516; X64-NEXT:    movl %ecx, %esi
517; X64-NEXT:    xorl %eax, %esi
518; X64-NEXT:    sets %dl
519; X64-NEXT:    addl $2147483647, %edx # imm = 0x7FFFFFFF
520; X64-NEXT:    imull %eax, %ecx
521; X64-NEXT:    cmovol %edx, %ecx
522; X64-NEXT:    movd %ecx, %xmm2
523; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
524; X64-NEXT:    movd %xmm1, %eax
525; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
526; X64-NEXT:    movd %xmm0, %ecx
527; X64-NEXT:    xorl %edx, %edx
528; X64-NEXT:    movl %ecx, %esi
529; X64-NEXT:    xorl %eax, %esi
530; X64-NEXT:    sets %dl
531; X64-NEXT:    addl $2147483647, %edx # imm = 0x7FFFFFFF
532; X64-NEXT:    imull %eax, %ecx
533; X64-NEXT:    cmovol %edx, %ecx
534; X64-NEXT:    movd %ecx, %xmm0
535; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
536; X64-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
537; X64-NEXT:    movdqa %xmm2, %xmm0
538; X64-NEXT:    retq
539;
540; X86-LABEL: vec2:
541; X86:       # %bb.0:
542; X86-NEXT:    pushl %ebp
543; X86-NEXT:    pushl %ebx
544; X86-NEXT:    pushl %edi
545; X86-NEXT:    pushl %esi
546; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
547; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
548; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
549; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
550; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
551; X86-NEXT:    xorl %eax, %eax
552; X86-NEXT:    movl %ecx, %edx
553; X86-NEXT:    xorl %edi, %edx
554; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
555; X86-NEXT:    sets %al
556; X86-NEXT:    addl $2147483647, %eax # imm = 0x7FFFFFFF
557; X86-NEXT:    imull %edi, %ecx
558; X86-NEXT:    cmovol %eax, %ecx
559; X86-NEXT:    xorl %eax, %eax
560; X86-NEXT:    movl %edx, %edi
561; X86-NEXT:    xorl %ebx, %edi
562; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
563; X86-NEXT:    sets %al
564; X86-NEXT:    addl $2147483647, %eax # imm = 0x7FFFFFFF
565; X86-NEXT:    imull %ebx, %edx
566; X86-NEXT:    cmovol %eax, %edx
567; X86-NEXT:    xorl %eax, %eax
568; X86-NEXT:    movl %edi, %ebx
569; X86-NEXT:    xorl %esi, %ebx
570; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
571; X86-NEXT:    sets %al
572; X86-NEXT:    addl $2147483647, %eax # imm = 0x7FFFFFFF
573; X86-NEXT:    imull %esi, %edi
574; X86-NEXT:    cmovol %eax, %edi
575; X86-NEXT:    xorl %eax, %eax
576; X86-NEXT:    movl %ebx, %esi
577; X86-NEXT:    xorl %ebp, %esi
578; X86-NEXT:    sets %al
579; X86-NEXT:    addl $2147483647, %eax # imm = 0x7FFFFFFF
580; X86-NEXT:    imull %ebp, %ebx
581; X86-NEXT:    cmovol %eax, %ebx
582; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
583; X86-NEXT:    movl %ebx, 12(%eax)
584; X86-NEXT:    movl %edi, 8(%eax)
585; X86-NEXT:    movl %edx, 4(%eax)
586; X86-NEXT:    movl %ecx, (%eax)
587; X86-NEXT:    popl %esi
588; X86-NEXT:    popl %edi
589; X86-NEXT:    popl %ebx
590; X86-NEXT:    popl %ebp
591; X86-NEXT:    retl $4
592  %tmp = call <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0)
593  ret <4 x i32> %tmp
594}
595
596define i64 @func7(i64 %x, i64 %y) nounwind {
597; X64-LABEL: func7:
598; X64:       # %bb.0:
599; X64-NEXT:    movq %rdi, %rax
600; X64-NEXT:    imulq %rsi
601; X64-NEXT:    shrdq $32, %rdx, %rax
602; X64-NEXT:    cmpq $2147483647, %rdx # imm = 0x7FFFFFFF
603; X64-NEXT:    movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
604; X64-NEXT:    cmovgq %rcx, %rax
605; X64-NEXT:    cmpq $-2147483648, %rdx # imm = 0x80000000
606; X64-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
607; X64-NEXT:    cmovlq %rcx, %rax
608; X64-NEXT:    retq
609;
610; X86-LABEL: func7:
611; X86:       # %bb.0:
612; X86-NEXT:    pushl %ebp
613; X86-NEXT:    pushl %ebx
614; X86-NEXT:    pushl %edi
615; X86-NEXT:    pushl %esi
616; X86-NEXT:    pushl %eax
617; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
618; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
619; X86-NEXT:    movl %ecx, %eax
620; X86-NEXT:    mull {{[0-9]+}}(%esp)
621; X86-NEXT:    movl %edx, %edi
622; X86-NEXT:    movl %eax, %ebp
623; X86-NEXT:    movl %ecx, %eax
624; X86-NEXT:    mull {{[0-9]+}}(%esp)
625; X86-NEXT:    addl %edx, %ebp
626; X86-NEXT:    adcl $0, %edi
627; X86-NEXT:    movl %esi, %eax
628; X86-NEXT:    imull {{[0-9]+}}(%esp)
629; X86-NEXT:    movl %edx, %ebx
630; X86-NEXT:    movl %eax, %ecx
631; X86-NEXT:    movl %esi, %eax
632; X86-NEXT:    mull {{[0-9]+}}(%esp)
633; X86-NEXT:    addl %ebp, %eax
634; X86-NEXT:    adcl %edi, %edx
635; X86-NEXT:    adcl $0, %ebx
636; X86-NEXT:    addl %ecx, %edx
637; X86-NEXT:    adcl $0, %ebx
638; X86-NEXT:    movl %edx, %ecx
639; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
640; X86-NEXT:    movl %ebx, %esi
641; X86-NEXT:    sbbl $0, %esi
642; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
643; X86-NEXT:    cmovnsl %ebx, %esi
644; X86-NEXT:    cmovsl %ecx, %edx
645; X86-NEXT:    movl %edx, %edi
646; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
647; X86-NEXT:    movl %esi, %ecx
648; X86-NEXT:    sbbl $0, %ecx
649; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
650; X86-NEXT:    cmovnsl %esi, %ecx
651; X86-NEXT:    cmovsl %edi, %edx
652; X86-NEXT:    testl %edx, %edx
653; X86-NEXT:    setns {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
654; X86-NEXT:    sets %bh
655; X86-NEXT:    testl %ecx, %ecx
656; X86-NEXT:    setg {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
657; X86-NEXT:    sete %bl
658; X86-NEXT:    andb %bh, %bl
659; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload
660; X86-NEXT:    movl $2147483647, %esi # imm = 0x7FFFFFFF
661; X86-NEXT:    cmovnel %esi, %edx
662; X86-NEXT:    movl $-1, %esi
663; X86-NEXT:    cmovnel %esi, %eax
664; X86-NEXT:    cmpl $-1, %ecx
665; X86-NEXT:    setl %cl
666; X86-NEXT:    sete %ch
667; X86-NEXT:    andb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload
668; X86-NEXT:    xorl %esi, %esi
669; X86-NEXT:    orb %cl, %ch
670; X86-NEXT:    cmovnel %esi, %eax
671; X86-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
672; X86-NEXT:    cmovnel %ecx, %edx
673; X86-NEXT:    addl $4, %esp
674; X86-NEXT:    popl %esi
675; X86-NEXT:    popl %edi
676; X86-NEXT:    popl %ebx
677; X86-NEXT:    popl %ebp
678; X86-NEXT:    retl
679  %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 32)
680  ret i64 %tmp
681}
682
683define i64 @func8(i64 %x, i64 %y) nounwind {
684; X64-LABEL: func8:
685; X64:       # %bb.0:
686; X64-NEXT:    movq %rdi, %rax
687; X64-NEXT:    imulq %rsi
688; X64-NEXT:    shrdq $63, %rdx, %rax
689; X64-NEXT:    movabsq $4611686018427387903, %rcx # imm = 0x3FFFFFFFFFFFFFFF
690; X64-NEXT:    cmpq %rcx, %rdx
691; X64-NEXT:    movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
692; X64-NEXT:    cmovgq %rcx, %rax
693; X64-NEXT:    movabsq $-4611686018427387904, %rcx # imm = 0xC000000000000000
694; X64-NEXT:    cmpq %rcx, %rdx
695; X64-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
696; X64-NEXT:    cmovlq %rcx, %rax
697; X64-NEXT:    retq
698;
699; X86-LABEL: func8:
700; X86:       # %bb.0:
701; X86-NEXT:    pushl %ebp
702; X86-NEXT:    pushl %ebx
703; X86-NEXT:    pushl %edi
704; X86-NEXT:    pushl %esi
705; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
706; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
707; X86-NEXT:    movl %ecx, %eax
708; X86-NEXT:    mull {{[0-9]+}}(%esp)
709; X86-NEXT:    movl %edx, %edi
710; X86-NEXT:    movl %eax, %ebp
711; X86-NEXT:    movl %ecx, %eax
712; X86-NEXT:    mull {{[0-9]+}}(%esp)
713; X86-NEXT:    addl %edx, %ebp
714; X86-NEXT:    adcl $0, %edi
715; X86-NEXT:    movl %esi, %eax
716; X86-NEXT:    imull {{[0-9]+}}(%esp)
717; X86-NEXT:    movl %edx, %ebx
718; X86-NEXT:    movl %eax, %ecx
719; X86-NEXT:    movl %esi, %eax
720; X86-NEXT:    mull {{[0-9]+}}(%esp)
721; X86-NEXT:    addl %ebp, %eax
722; X86-NEXT:    adcl %edx, %edi
723; X86-NEXT:    adcl $0, %ebx
724; X86-NEXT:    addl %ecx, %edi
725; X86-NEXT:    adcl $0, %ebx
726; X86-NEXT:    movl %edi, %edx
727; X86-NEXT:    subl {{[0-9]+}}(%esp), %edx
728; X86-NEXT:    movl %ebx, %ebp
729; X86-NEXT:    sbbl $0, %ebp
730; X86-NEXT:    testl %esi, %esi
731; X86-NEXT:    cmovnsl %ebx, %ebp
732; X86-NEXT:    cmovnsl %edi, %edx
733; X86-NEXT:    movl %edx, %esi
734; X86-NEXT:    subl {{[0-9]+}}(%esp), %esi
735; X86-NEXT:    movl %ebp, %ecx
736; X86-NEXT:    sbbl $0, %ecx
737; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
738; X86-NEXT:    cmovnsl %ebp, %ecx
739; X86-NEXT:    cmovnsl %edx, %esi
740; X86-NEXT:    shrdl $31, %esi, %eax
741; X86-NEXT:    shrdl $31, %ecx, %esi
742; X86-NEXT:    cmpl $1073741824, %ecx # imm = 0x40000000
743; X86-NEXT:    movl $2147483647, %edi # imm = 0x7FFFFFFF
744; X86-NEXT:    cmovll %esi, %edi
745; X86-NEXT:    movl $-1, %edx
746; X86-NEXT:    cmovgel %edx, %eax
747; X86-NEXT:    xorl %edx, %edx
748; X86-NEXT:    cmpl $-1073741824, %ecx # imm = 0xC0000000
749; X86-NEXT:    cmovll %edx, %eax
750; X86-NEXT:    movl $-2147483648, %edx # imm = 0x80000000
751; X86-NEXT:    cmovgel %edi, %edx
752; X86-NEXT:    popl %esi
753; X86-NEXT:    popl %edi
754; X86-NEXT:    popl %ebx
755; X86-NEXT:    popl %ebp
756; X86-NEXT:    retl
757  %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 63)
758  ret i64 %tmp
759}
760