xref: /llvm-project/llvm/test/CodeGen/X86/sdiv_fix.ll (revision 401d123a1fdcbbf4ae7a20178957b7e3a625c044)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
3; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
4
5declare  i4  @llvm.sdiv.fix.i4   (i4,  i4,  i32)
6declare  i15 @llvm.sdiv.fix.i15  (i15, i15, i32)
7declare  i16 @llvm.sdiv.fix.i16  (i16, i16, i32)
8declare  i18 @llvm.sdiv.fix.i18  (i18, i18, i32)
9declare  i64 @llvm.sdiv.fix.i64  (i64, i64, i32)
10declare  <4 x i32> @llvm.sdiv.fix.v4i32(<4 x i32>, <4 x i32>, i32)
11
12define i16 @func(i16 %x, i16 %y) nounwind {
13; X64-LABEL: func:
14; X64:       # %bb.0:
15; X64-NEXT:    movswl %si, %esi
16; X64-NEXT:    movswl %di, %ecx
17; X64-NEXT:    shll $7, %ecx
18; X64-NEXT:    movl %ecx, %eax
19; X64-NEXT:    cltd
20; X64-NEXT:    idivl %esi
21; X64-NEXT:    # kill: def $eax killed $eax def $rax
22; X64-NEXT:    leal -1(%rax), %edi
23; X64-NEXT:    testl %esi, %esi
24; X64-NEXT:    sets %sil
25; X64-NEXT:    testl %ecx, %ecx
26; X64-NEXT:    sets %cl
27; X64-NEXT:    xorb %sil, %cl
28; X64-NEXT:    testl %edx, %edx
29; X64-NEXT:    setne %dl
30; X64-NEXT:    testb %cl, %dl
31; X64-NEXT:    cmovnel %edi, %eax
32; X64-NEXT:    # kill: def $ax killed $ax killed $rax
33; X64-NEXT:    retq
34;
35; X86-LABEL: func:
36; X86:       # %bb.0:
37; X86-NEXT:    pushl %ebx
38; X86-NEXT:    pushl %edi
39; X86-NEXT:    pushl %esi
40; X86-NEXT:    movswl {{[0-9]+}}(%esp), %esi
41; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
42; X86-NEXT:    shll $7, %ecx
43; X86-NEXT:    movl %ecx, %eax
44; X86-NEXT:    cltd
45; X86-NEXT:    idivl %esi
46; X86-NEXT:    leal -1(%eax), %edi
47; X86-NEXT:    testl %esi, %esi
48; X86-NEXT:    sets %bl
49; X86-NEXT:    testl %ecx, %ecx
50; X86-NEXT:    sets %cl
51; X86-NEXT:    xorb %bl, %cl
52; X86-NEXT:    testl %edx, %edx
53; X86-NEXT:    setne %dl
54; X86-NEXT:    testb %cl, %dl
55; X86-NEXT:    cmovnel %edi, %eax
56; X86-NEXT:    # kill: def $ax killed $ax killed $eax
57; X86-NEXT:    popl %esi
58; X86-NEXT:    popl %edi
59; X86-NEXT:    popl %ebx
60; X86-NEXT:    retl
61  %tmp = call i16 @llvm.sdiv.fix.i16(i16 %x, i16 %y, i32 7)
62  ret i16 %tmp
63}
64
65define i16 @func2(i8 %x, i8 %y) nounwind {
66; X64-LABEL: func2:
67; X64:       # %bb.0:
68; X64-NEXT:    movsbl %sil, %esi
69; X64-NEXT:    movsbl %dil, %ecx
70; X64-NEXT:    shll $14, %ecx
71; X64-NEXT:    movl %ecx, %eax
72; X64-NEXT:    cltd
73; X64-NEXT:    idivl %esi
74; X64-NEXT:    # kill: def $eax killed $eax def $rax
75; X64-NEXT:    leal -1(%rax), %edi
76; X64-NEXT:    testl %esi, %esi
77; X64-NEXT:    sets %sil
78; X64-NEXT:    testl %ecx, %ecx
79; X64-NEXT:    sets %cl
80; X64-NEXT:    xorb %sil, %cl
81; X64-NEXT:    testl %edx, %edx
82; X64-NEXT:    setne %dl
83; X64-NEXT:    testb %cl, %dl
84; X64-NEXT:    cmovel %eax, %edi
85; X64-NEXT:    addl %edi, %edi
86; X64-NEXT:    movswl %di, %eax
87; X64-NEXT:    shrl %eax
88; X64-NEXT:    # kill: def $ax killed $ax killed $eax
89; X64-NEXT:    retq
90;
91; X86-LABEL: func2:
92; X86:       # %bb.0:
93; X86-NEXT:    pushl %ebx
94; X86-NEXT:    pushl %edi
95; X86-NEXT:    pushl %esi
96; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %edi
97; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
98; X86-NEXT:    shll $14, %ecx
99; X86-NEXT:    movl %ecx, %eax
100; X86-NEXT:    cltd
101; X86-NEXT:    idivl %edi
102; X86-NEXT:    leal -1(%eax), %esi
103; X86-NEXT:    testl %edi, %edi
104; X86-NEXT:    sets %bl
105; X86-NEXT:    testl %ecx, %ecx
106; X86-NEXT:    sets %cl
107; X86-NEXT:    xorb %bl, %cl
108; X86-NEXT:    testl %edx, %edx
109; X86-NEXT:    setne %dl
110; X86-NEXT:    testb %cl, %dl
111; X86-NEXT:    cmovel %eax, %esi
112; X86-NEXT:    addl %esi, %esi
113; X86-NEXT:    movswl %si, %eax
114; X86-NEXT:    shrl %eax
115; X86-NEXT:    # kill: def $ax killed $ax killed $eax
116; X86-NEXT:    popl %esi
117; X86-NEXT:    popl %edi
118; X86-NEXT:    popl %ebx
119; X86-NEXT:    retl
120  %x2 = sext i8 %x to i15
121  %y2 = sext i8 %y to i15
122  %tmp = call i15 @llvm.sdiv.fix.i15(i15 %x2, i15 %y2, i32 14)
123  %tmp2 = sext i15 %tmp to i16
124  ret i16 %tmp2
125}
126
127define i16 @func3(i15 %x, i8 %y) nounwind {
128; X64-LABEL: func3:
129; X64:       # %bb.0:
130; X64-NEXT:    shll $8, %esi
131; X64-NEXT:    movswl %si, %ecx
132; X64-NEXT:    addl %edi, %edi
133; X64-NEXT:    shrl $4, %ecx
134; X64-NEXT:    movl %edi, %eax
135; X64-NEXT:    cwtd
136; X64-NEXT:    idivw %cx
137; X64-NEXT:    # kill: def $ax killed $ax def $rax
138; X64-NEXT:    leal -1(%rax), %esi
139; X64-NEXT:    testw %di, %di
140; X64-NEXT:    sets %dil
141; X64-NEXT:    testw %cx, %cx
142; X64-NEXT:    sets %cl
143; X64-NEXT:    xorb %dil, %cl
144; X64-NEXT:    testw %dx, %dx
145; X64-NEXT:    setne %dl
146; X64-NEXT:    testb %cl, %dl
147; X64-NEXT:    cmovel %eax, %esi
148; X64-NEXT:    addl %esi, %esi
149; X64-NEXT:    movswl %si, %eax
150; X64-NEXT:    shrl %eax
151; X64-NEXT:    # kill: def $ax killed $ax killed $eax
152; X64-NEXT:    retq
153;
154; X86-LABEL: func3:
155; X86:       # %bb.0:
156; X86-NEXT:    pushl %edi
157; X86-NEXT:    pushl %esi
158; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
159; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
160; X86-NEXT:    shll $8, %eax
161; X86-NEXT:    movswl %ax, %esi
162; X86-NEXT:    addl %ecx, %ecx
163; X86-NEXT:    shrl $4, %esi
164; X86-NEXT:    movl %ecx, %eax
165; X86-NEXT:    cwtd
166; X86-NEXT:    idivw %si
167; X86-NEXT:    # kill: def $ax killed $ax def $eax
168; X86-NEXT:    leal -1(%eax), %edi
169; X86-NEXT:    testw %cx, %cx
170; X86-NEXT:    sets %cl
171; X86-NEXT:    testw %si, %si
172; X86-NEXT:    sets %ch
173; X86-NEXT:    xorb %cl, %ch
174; X86-NEXT:    testw %dx, %dx
175; X86-NEXT:    setne %cl
176; X86-NEXT:    testb %ch, %cl
177; X86-NEXT:    cmovel %eax, %edi
178; X86-NEXT:    addl %edi, %edi
179; X86-NEXT:    movswl %di, %eax
180; X86-NEXT:    shrl %eax
181; X86-NEXT:    # kill: def $ax killed $ax killed $eax
182; X86-NEXT:    popl %esi
183; X86-NEXT:    popl %edi
184; X86-NEXT:    retl
185  %y2 = sext i8 %y to i15
186  %y3 = shl i15 %y2, 7
187  %tmp = call i15 @llvm.sdiv.fix.i15(i15 %x, i15 %y3, i32 4)
188  %tmp2 = sext i15 %tmp to i16
189  ret i16 %tmp2
190}
191
192define i4 @func4(i4 %x, i4 %y) nounwind {
193; X64-LABEL: func4:
194; X64:       # %bb.0:
195; X64-NEXT:    shlb $4, %sil
196; X64-NEXT:    sarb $4, %sil
197; X64-NEXT:    shlb $4, %dil
198; X64-NEXT:    sarb $4, %dil
199; X64-NEXT:    shlb $2, %dil
200; X64-NEXT:    movsbl %dil, %ecx
201; X64-NEXT:    movl %ecx, %eax
202; X64-NEXT:    idivb %sil
203; X64-NEXT:    movsbl %ah, %edx
204; X64-NEXT:    movzbl %al, %edi
205; X64-NEXT:    leal -1(%rdi), %eax
206; X64-NEXT:    movzbl %al, %eax
207; X64-NEXT:    testb %sil, %sil
208; X64-NEXT:    sets %sil
209; X64-NEXT:    testb %cl, %cl
210; X64-NEXT:    sets %cl
211; X64-NEXT:    xorb %sil, %cl
212; X64-NEXT:    testb %dl, %dl
213; X64-NEXT:    setne %dl
214; X64-NEXT:    testb %cl, %dl
215; X64-NEXT:    cmovel %edi, %eax
216; X64-NEXT:    # kill: def $al killed $al killed $eax
217; X64-NEXT:    retq
218;
219; X86-LABEL: func4:
220; X86:       # %bb.0:
221; X86-NEXT:    pushl %ebx
222; X86-NEXT:    pushl %esi
223; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
224; X86-NEXT:    shlb $4, %cl
225; X86-NEXT:    sarb $4, %cl
226; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
227; X86-NEXT:    shlb $4, %dl
228; X86-NEXT:    sarb $4, %dl
229; X86-NEXT:    shlb $2, %dl
230; X86-NEXT:    movsbl %dl, %eax
231; X86-NEXT:    idivb %cl
232; X86-NEXT:    movsbl %ah, %ebx
233; X86-NEXT:    movzbl %al, %esi
234; X86-NEXT:    decb %al
235; X86-NEXT:    movzbl %al, %eax
236; X86-NEXT:    testb %cl, %cl
237; X86-NEXT:    sets %cl
238; X86-NEXT:    testb %dl, %dl
239; X86-NEXT:    sets %dl
240; X86-NEXT:    xorb %cl, %dl
241; X86-NEXT:    testb %bl, %bl
242; X86-NEXT:    setne %cl
243; X86-NEXT:    testb %dl, %cl
244; X86-NEXT:    cmovel %esi, %eax
245; X86-NEXT:    # kill: def $al killed $al killed $eax
246; X86-NEXT:    popl %esi
247; X86-NEXT:    popl %ebx
248; X86-NEXT:    retl
249  %tmp = call i4 @llvm.sdiv.fix.i4(i4 %x, i4 %y, i32 2)
250  ret i4 %tmp
251}
252
253define i64 @func5(i64 %x, i64 %y) nounwind {
254; X64-LABEL: func5:
255; X64:       # %bb.0:
256; X64-NEXT:    pushq %rbp
257; X64-NEXT:    pushq %r15
258; X64-NEXT:    pushq %r14
259; X64-NEXT:    pushq %r13
260; X64-NEXT:    pushq %r12
261; X64-NEXT:    pushq %rbx
262; X64-NEXT:    pushq %rax
263; X64-NEXT:    movq %rsi, %rbx
264; X64-NEXT:    movq %rdi, %r14
265; X64-NEXT:    movq %rdi, %r15
266; X64-NEXT:    sarq $63, %r15
267; X64-NEXT:    shldq $31, %rdi, %r15
268; X64-NEXT:    shlq $31, %r14
269; X64-NEXT:    movq %rsi, %r12
270; X64-NEXT:    sarq $63, %r12
271; X64-NEXT:    movq %r14, %rdi
272; X64-NEXT:    movq %r15, %rsi
273; X64-NEXT:    movq %rbx, %rdx
274; X64-NEXT:    movq %r12, %rcx
275; X64-NEXT:    callq __divti3@PLT
276; X64-NEXT:    movq %rax, (%rsp) # 8-byte Spill
277; X64-NEXT:    leaq -1(%rax), %rbp
278; X64-NEXT:    testq %r15, %r15
279; X64-NEXT:    sets %al
280; X64-NEXT:    testq %r12, %r12
281; X64-NEXT:    sets %r13b
282; X64-NEXT:    xorb %al, %r13b
283; X64-NEXT:    movq %r14, %rdi
284; X64-NEXT:    movq %r15, %rsi
285; X64-NEXT:    movq %rbx, %rdx
286; X64-NEXT:    movq %r12, %rcx
287; X64-NEXT:    callq __modti3@PLT
288; X64-NEXT:    orq %rax, %rdx
289; X64-NEXT:    setne %al
290; X64-NEXT:    testb %r13b, %al
291; X64-NEXT:    cmoveq (%rsp), %rbp # 8-byte Folded Reload
292; X64-NEXT:    movq %rbp, %rax
293; X64-NEXT:    addq $8, %rsp
294; X64-NEXT:    popq %rbx
295; X64-NEXT:    popq %r12
296; X64-NEXT:    popq %r13
297; X64-NEXT:    popq %r14
298; X64-NEXT:    popq %r15
299; X64-NEXT:    popq %rbp
300; X64-NEXT:    retq
301;
302; X86-LABEL: func5:
303; X86:       # %bb.0:
304; X86-NEXT:    pushl %ebp
305; X86-NEXT:    movl %esp, %ebp
306; X86-NEXT:    pushl %ebx
307; X86-NEXT:    pushl %edi
308; X86-NEXT:    pushl %esi
309; X86-NEXT:    andl $-16, %esp
310; X86-NEXT:    subl $80, %esp
311; X86-NEXT:    movl 8(%ebp), %ecx
312; X86-NEXT:    movl 12(%ebp), %eax
313; X86-NEXT:    movl 20(%ebp), %edx
314; X86-NEXT:    movl %edx, %esi
315; X86-NEXT:    sarl $31, %esi
316; X86-NEXT:    movl %eax, %edi
317; X86-NEXT:    sarl $31, %edi
318; X86-NEXT:    movl %edi, %ebx
319; X86-NEXT:    shldl $31, %eax, %ebx
320; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
321; X86-NEXT:    shldl $31, %ecx, %eax
322; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
323; X86-NEXT:    shll $31, %ecx
324; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
325; X86-NEXT:    pushl %esi
326; X86-NEXT:    pushl %esi
327; X86-NEXT:    pushl %edx
328; X86-NEXT:    pushl 16(%ebp)
329; X86-NEXT:    pushl %edi
330; X86-NEXT:    pushl %ebx
331; X86-NEXT:    pushl %eax
332; X86-NEXT:    pushl %ecx
333; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
334; X86-NEXT:    pushl %eax
335; X86-NEXT:    calll __divti3
336; X86-NEXT:    addl $32, %esp
337; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
338; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
339; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
340; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
341; X86-NEXT:    subl $1, %eax
342; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
343; X86-NEXT:    sbbl $0, %ebx
344; X86-NEXT:    testl %esi, %esi
345; X86-NEXT:    sets %al
346; X86-NEXT:    testl %edi, %edi
347; X86-NEXT:    sets %cl
348; X86-NEXT:    xorb %al, %cl
349; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
350; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
351; X86-NEXT:    pushl %esi
352; X86-NEXT:    pushl %esi
353; X86-NEXT:    pushl 20(%ebp)
354; X86-NEXT:    pushl 16(%ebp)
355; X86-NEXT:    pushl %edi
356; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
357; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
358; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
359; X86-NEXT:    pushl %eax
360; X86-NEXT:    calll __modti3
361; X86-NEXT:    addl $32, %esp
362; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
363; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
364; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
365; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
366; X86-NEXT:    orl %eax, %ecx
367; X86-NEXT:    setne %al
368; X86-NEXT:    testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
369; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
370; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
371; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
372; X86-NEXT:    movl %ebx, %edx
373; X86-NEXT:    leal -12(%ebp), %esp
374; X86-NEXT:    popl %esi
375; X86-NEXT:    popl %edi
376; X86-NEXT:    popl %ebx
377; X86-NEXT:    popl %ebp
378; X86-NEXT:    retl
379  %tmp = call i64 @llvm.sdiv.fix.i64(i64 %x, i64 %y, i32 31)
380  ret i64 %tmp
381}
382
383define i18 @func6(i16 %x, i16 %y) nounwind {
384; X64-LABEL: func6:
385; X64:       # %bb.0:
386; X64-NEXT:    movswl %di, %ecx
387; X64-NEXT:    movswl %si, %esi
388; X64-NEXT:    shll $7, %ecx
389; X64-NEXT:    movl %ecx, %eax
390; X64-NEXT:    cltd
391; X64-NEXT:    idivl %esi
392; X64-NEXT:    # kill: def $eax killed $eax def $rax
393; X64-NEXT:    leal -1(%rax), %edi
394; X64-NEXT:    testl %esi, %esi
395; X64-NEXT:    sets %sil
396; X64-NEXT:    testl %ecx, %ecx
397; X64-NEXT:    sets %cl
398; X64-NEXT:    xorb %sil, %cl
399; X64-NEXT:    testl %edx, %edx
400; X64-NEXT:    setne %dl
401; X64-NEXT:    testb %cl, %dl
402; X64-NEXT:    cmovnel %edi, %eax
403; X64-NEXT:    # kill: def $eax killed $eax killed $rax
404; X64-NEXT:    retq
405;
406; X86-LABEL: func6:
407; X86:       # %bb.0:
408; X86-NEXT:    pushl %ebx
409; X86-NEXT:    pushl %edi
410; X86-NEXT:    pushl %esi
411; X86-NEXT:    movswl {{[0-9]+}}(%esp), %esi
412; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
413; X86-NEXT:    shll $7, %ecx
414; X86-NEXT:    movl %ecx, %eax
415; X86-NEXT:    cltd
416; X86-NEXT:    idivl %esi
417; X86-NEXT:    leal -1(%eax), %edi
418; X86-NEXT:    testl %esi, %esi
419; X86-NEXT:    sets %bl
420; X86-NEXT:    testl %ecx, %ecx
421; X86-NEXT:    sets %cl
422; X86-NEXT:    xorb %bl, %cl
423; X86-NEXT:    testl %edx, %edx
424; X86-NEXT:    setne %dl
425; X86-NEXT:    testb %cl, %dl
426; X86-NEXT:    cmovnel %edi, %eax
427; X86-NEXT:    popl %esi
428; X86-NEXT:    popl %edi
429; X86-NEXT:    popl %ebx
430; X86-NEXT:    retl
431  %x2 = sext i16 %x to i18
432  %y2 = sext i16 %y to i18
433  %tmp = call i18 @llvm.sdiv.fix.i18(i18 %x2, i18 %y2, i32 7)
434  ret i18 %tmp
435}
436
437define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
438; X64-LABEL: vec:
439; X64:       # %bb.0:
440; X64-NEXT:    pxor %xmm2, %xmm2
441; X64-NEXT:    pcmpgtd %xmm1, %xmm2
442; X64-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
443; X64-NEXT:    movdqa %xmm1, %xmm3
444; X64-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
445; X64-NEXT:    movq %xmm3, %rcx
446; X64-NEXT:    pxor %xmm5, %xmm5
447; X64-NEXT:    pcmpgtd %xmm0, %xmm5
448; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
449; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
450; X64-NEXT:    psllq $31, %xmm0
451; X64-NEXT:    movq %xmm0, %rax
452; X64-NEXT:    cqto
453; X64-NEXT:    idivq %rcx
454; X64-NEXT:    movq %rax, %rsi
455; X64-NEXT:    movq %rdx, %r8
456; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
457; X64-NEXT:    movq %xmm3, %rcx
458; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
459; X64-NEXT:    movq %xmm3, %rax
460; X64-NEXT:    cqto
461; X64-NEXT:    idivq %rcx
462; X64-NEXT:    movq %rax, %rdi
463; X64-NEXT:    movq %rdx, %r10
464; X64-NEXT:    pxor %xmm3, %xmm3
465; X64-NEXT:    pcmpgtd %xmm4, %xmm3
466; X64-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
467; X64-NEXT:    movq %xmm4, %rcx
468; X64-NEXT:    pxor %xmm5, %xmm5
469; X64-NEXT:    pcmpgtd %xmm1, %xmm5
470; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
471; X64-NEXT:    psllq $31, %xmm1
472; X64-NEXT:    movq %xmm1, %rax
473; X64-NEXT:    cqto
474; X64-NEXT:    idivq %rcx
475; X64-NEXT:    movq %rax, %rcx
476; X64-NEXT:    movq %rdx, %r9
477; X64-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
478; X64-NEXT:    movq %xmm4, %r11
479; X64-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
480; X64-NEXT:    movq %xmm4, %rax
481; X64-NEXT:    cqto
482; X64-NEXT:    idivq %r11
483; X64-NEXT:    movq %r8, %xmm5
484; X64-NEXT:    movq %r10, %xmm6
485; X64-NEXT:    pxor %xmm4, %xmm4
486; X64-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
487; X64-NEXT:    pcmpeqd %xmm4, %xmm5
488; X64-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,0,3,2]
489; X64-NEXT:    pand %xmm5, %xmm6
490; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
491; X64-NEXT:    pxor %xmm5, %xmm5
492; X64-NEXT:    pcmpgtd %xmm2, %xmm5
493; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
494; X64-NEXT:    pxor %xmm2, %xmm2
495; X64-NEXT:    pcmpgtd %xmm0, %xmm2
496; X64-NEXT:    movq %rsi, %xmm0
497; X64-NEXT:    pxor %xmm5, %xmm2
498; X64-NEXT:    movq %rdi, %xmm5
499; X64-NEXT:    pandn %xmm2, %xmm6
500; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
501; X64-NEXT:    movdqa %xmm6, %xmm5
502; X64-NEXT:    pandn %xmm0, %xmm5
503; X64-NEXT:    pcmpeqd %xmm2, %xmm2
504; X64-NEXT:    paddq %xmm2, %xmm0
505; X64-NEXT:    pand %xmm6, %xmm0
506; X64-NEXT:    por %xmm5, %xmm0
507; X64-NEXT:    movq %r9, %xmm5
508; X64-NEXT:    movq %rdx, %xmm6
509; X64-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
510; X64-NEXT:    pcmpeqd %xmm4, %xmm5
511; X64-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,0,3,2]
512; X64-NEXT:    pand %xmm5, %xmm6
513; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
514; X64-NEXT:    pxor %xmm5, %xmm5
515; X64-NEXT:    pcmpgtd %xmm3, %xmm5
516; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
517; X64-NEXT:    pcmpgtd %xmm1, %xmm4
518; X64-NEXT:    pxor %xmm5, %xmm4
519; X64-NEXT:    pandn %xmm4, %xmm6
520; X64-NEXT:    movq %rcx, %xmm1
521; X64-NEXT:    movq %rax, %xmm3
522; X64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
523; X64-NEXT:    movdqa %xmm6, %xmm3
524; X64-NEXT:    pandn %xmm1, %xmm3
525; X64-NEXT:    paddq %xmm2, %xmm1
526; X64-NEXT:    pand %xmm6, %xmm1
527; X64-NEXT:    por %xmm3, %xmm1
528; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
529; X64-NEXT:    retq
530;
531; X86-LABEL: vec:
532; X86:       # %bb.0:
533; X86-NEXT:    pushl %ebp
534; X86-NEXT:    pushl %ebx
535; X86-NEXT:    pushl %edi
536; X86-NEXT:    pushl %esi
537; X86-NEXT:    subl $60, %esp
538; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
539; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
540; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
541; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
542; X86-NEXT:    movl %esi, %ebp
543; X86-NEXT:    sarl $31, %ebp
544; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
545; X86-NEXT:    movl %ecx, %edx
546; X86-NEXT:    shll $31, %edx
547; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
548; X86-NEXT:    movl %ecx, %eax
549; X86-NEXT:    shrl $31, %eax
550; X86-NEXT:    shldl $31, %ecx, %eax
551; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
552; X86-NEXT:    pushl %ebp
553; X86-NEXT:    pushl %esi
554; X86-NEXT:    pushl %eax
555; X86-NEXT:    pushl %edx
556; X86-NEXT:    calll __divdi3
557; X86-NEXT:    addl $16, %esp
558; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
559; X86-NEXT:    movl %edi, %eax
560; X86-NEXT:    sarl $31, %eax
561; X86-NEXT:    movl %ebx, %ebp
562; X86-NEXT:    shll $31, %ebp
563; X86-NEXT:    movl %ebx, %ecx
564; X86-NEXT:    shrl $31, %ecx
565; X86-NEXT:    shldl $31, %ebx, %ecx
566; X86-NEXT:    pushl %eax
567; X86-NEXT:    movl %eax, %esi
568; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
569; X86-NEXT:    pushl %edi
570; X86-NEXT:    pushl %ecx
571; X86-NEXT:    movl %ecx, %ebx
572; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
573; X86-NEXT:    pushl %ebp
574; X86-NEXT:    calll __moddi3
575; X86-NEXT:    addl $16, %esp
576; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
577; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
578; X86-NEXT:    pushl %esi
579; X86-NEXT:    pushl %edi
580; X86-NEXT:    pushl %ebx
581; X86-NEXT:    pushl %ebp
582; X86-NEXT:    calll __divdi3
583; X86-NEXT:    addl $16, %esp
584; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
585; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
586; X86-NEXT:    movl %eax, %ecx
587; X86-NEXT:    sarl $31, %ecx
588; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
589; X86-NEXT:    movl %edx, %ebx
590; X86-NEXT:    shll $31, %ebx
591; X86-NEXT:    movl %edx, %edi
592; X86-NEXT:    shrl $31, %edi
593; X86-NEXT:    shldl $31, %edx, %edi
594; X86-NEXT:    pushl %ecx
595; X86-NEXT:    movl %ecx, %ebp
596; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
597; X86-NEXT:    pushl %eax
598; X86-NEXT:    movl %eax, %esi
599; X86-NEXT:    pushl %edi
600; X86-NEXT:    pushl %ebx
601; X86-NEXT:    calll __moddi3
602; X86-NEXT:    addl $16, %esp
603; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
604; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
605; X86-NEXT:    pushl %ebp
606; X86-NEXT:    pushl %esi
607; X86-NEXT:    pushl %edi
608; X86-NEXT:    pushl %ebx
609; X86-NEXT:    calll __divdi3
610; X86-NEXT:    addl $16, %esp
611; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
612; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
613; X86-NEXT:    movl %eax, %ebx
614; X86-NEXT:    sarl $31, %ebx
615; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
616; X86-NEXT:    movl %ecx, %esi
617; X86-NEXT:    shll $31, %esi
618; X86-NEXT:    movl %ecx, %ebp
619; X86-NEXT:    shrl $31, %ebp
620; X86-NEXT:    shldl $31, %ecx, %ebp
621; X86-NEXT:    pushl %ebx
622; X86-NEXT:    pushl %eax
623; X86-NEXT:    pushl %ebp
624; X86-NEXT:    pushl %esi
625; X86-NEXT:    calll __moddi3
626; X86-NEXT:    addl $16, %esp
627; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
628; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
629; X86-NEXT:    pushl %ebx
630; X86-NEXT:    pushl {{[0-9]+}}(%esp)
631; X86-NEXT:    pushl %ebp
632; X86-NEXT:    pushl %esi
633; X86-NEXT:    calll __divdi3
634; X86-NEXT:    addl $16, %esp
635; X86-NEXT:    testl %ebp, %ebp
636; X86-NEXT:    sets %cl
637; X86-NEXT:    testl %ebx, %ebx
638; X86-NEXT:    sets %dl
639; X86-NEXT:    xorb %cl, %dl
640; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
641; X86-NEXT:    orl (%esp), %ecx # 4-byte Folded Reload
642; X86-NEXT:    setne %cl
643; X86-NEXT:    testb %dl, %cl
644; X86-NEXT:    leal -1(%eax), %ecx
645; X86-NEXT:    cmovel %eax, %ecx
646; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
647; X86-NEXT:    testl %edi, %edi
648; X86-NEXT:    sets %al
649; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
650; X86-NEXT:    sets %cl
651; X86-NEXT:    xorb %al, %cl
652; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
653; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
654; X86-NEXT:    setne %al
655; X86-NEXT:    testb %cl, %al
656; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
657; X86-NEXT:    leal -1(%eax), %ecx
658; X86-NEXT:    cmovel %eax, %ecx
659; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
660; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
661; X86-NEXT:    sets %al
662; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
663; X86-NEXT:    sets %cl
664; X86-NEXT:    xorb %al, %cl
665; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
666; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
667; X86-NEXT:    setne %al
668; X86-NEXT:    testb %cl, %al
669; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
670; X86-NEXT:    leal -1(%eax), %ebp
671; X86-NEXT:    cmovel %eax, %ebp
672; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
673; X86-NEXT:    testl %edx, %edx
674; X86-NEXT:    sets %al
675; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
676; X86-NEXT:    testl %ecx, %ecx
677; X86-NEXT:    sets %bl
678; X86-NEXT:    xorb %al, %bl
679; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
680; X86-NEXT:    leal -1(%edi), %esi
681; X86-NEXT:    pushl %ecx
682; X86-NEXT:    pushl {{[0-9]+}}(%esp)
683; X86-NEXT:    pushl %edx
684; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
685; X86-NEXT:    calll __moddi3
686; X86-NEXT:    addl $16, %esp
687; X86-NEXT:    orl %eax, %edx
688; X86-NEXT:    setne %al
689; X86-NEXT:    testb %bl, %al
690; X86-NEXT:    cmovel %edi, %esi
691; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
692; X86-NEXT:    movl %esi, 12(%eax)
693; X86-NEXT:    movl %ebp, 8(%eax)
694; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
695; X86-NEXT:    movl %ecx, 4(%eax)
696; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
697; X86-NEXT:    movl %ecx, (%eax)
698; X86-NEXT:    addl $60, %esp
699; X86-NEXT:    popl %esi
700; X86-NEXT:    popl %edi
701; X86-NEXT:    popl %ebx
702; X86-NEXT:    popl %ebp
703; X86-NEXT:    retl $4
704  %tmp = call <4 x i32> @llvm.sdiv.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 31)
705  ret <4 x i32> %tmp
706}
707