xref: /llvm-project/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
3; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
4
5define void @test_udiv7_v2i32(ptr %x, ptr %y) nounwind {
6; X64-LABEL: test_udiv7_v2i32:
7; X64:       # %bb.0:
8; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
9; X64-NEXT:    movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
10; X64-NEXT:    movdqa %xmm0, %xmm2
11; X64-NEXT:    pmuludq %xmm1, %xmm2
12; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
13; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
14; X64-NEXT:    pmuludq %xmm1, %xmm3
15; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
16; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
17; X64-NEXT:    psubd %xmm2, %xmm0
18; X64-NEXT:    psrld $1, %xmm0
19; X64-NEXT:    paddd %xmm2, %xmm0
20; X64-NEXT:    psrld $2, %xmm0
21; X64-NEXT:    movq %xmm0, (%rsi)
22; X64-NEXT:    retq
23;
24; X86-LABEL: test_udiv7_v2i32:
25; X86:       # %bb.0:
26; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
27; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
28; X86-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
29; X86-NEXT:    movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
30; X86-NEXT:    movdqa %xmm0, %xmm2
31; X86-NEXT:    pmuludq %xmm1, %xmm2
32; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
33; X86-NEXT:    movdqa %xmm0, %xmm3
34; X86-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,1,1]
35; X86-NEXT:    pmuludq %xmm1, %xmm3
36; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
37; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
38; X86-NEXT:    psubd %xmm2, %xmm0
39; X86-NEXT:    psrld $1, %xmm0
40; X86-NEXT:    paddd %xmm2, %xmm0
41; X86-NEXT:    psrld $2, %xmm0
42; X86-NEXT:    movq %xmm0, (%eax)
43; X86-NEXT:    retl
44  %a = load <2 x i32>, ptr %x
45  %b = udiv <2 x i32> %a, <i32 7, i32 7>
46  store <2 x i32> %b, ptr %y
47  ret void
48}
49
50define void @test_urem7_v2i32(ptr %x, ptr %y) nounwind {
51; X64-LABEL: test_urem7_v2i32:
52; X64:       # %bb.0:
53; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
54; X64-NEXT:    movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
55; X64-NEXT:    movdqa %xmm0, %xmm2
56; X64-NEXT:    pmuludq %xmm1, %xmm2
57; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
58; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
59; X64-NEXT:    pmuludq %xmm1, %xmm3
60; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
61; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
62; X64-NEXT:    movdqa %xmm0, %xmm1
63; X64-NEXT:    psubd %xmm2, %xmm1
64; X64-NEXT:    psrld $1, %xmm1
65; X64-NEXT:    paddd %xmm2, %xmm1
66; X64-NEXT:    psrld $2, %xmm1
67; X64-NEXT:    movdqa %xmm1, %xmm2
68; X64-NEXT:    pslld $3, %xmm2
69; X64-NEXT:    psubd %xmm2, %xmm1
70; X64-NEXT:    paddd %xmm0, %xmm1
71; X64-NEXT:    movq %xmm1, (%rsi)
72; X64-NEXT:    retq
73;
74; X86-LABEL: test_urem7_v2i32:
75; X86:       # %bb.0:
76; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
77; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
78; X86-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
79; X86-NEXT:    movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
80; X86-NEXT:    movdqa %xmm0, %xmm2
81; X86-NEXT:    pmuludq %xmm1, %xmm2
82; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
83; X86-NEXT:    movdqa %xmm0, %xmm3
84; X86-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,1,1]
85; X86-NEXT:    pmuludq %xmm1, %xmm3
86; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
87; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
88; X86-NEXT:    movdqa %xmm0, %xmm1
89; X86-NEXT:    psubd %xmm2, %xmm1
90; X86-NEXT:    psrld $1, %xmm1
91; X86-NEXT:    paddd %xmm2, %xmm1
92; X86-NEXT:    psrld $2, %xmm1
93; X86-NEXT:    movdqa %xmm1, %xmm2
94; X86-NEXT:    pslld $3, %xmm2
95; X86-NEXT:    psubd %xmm2, %xmm1
96; X86-NEXT:    paddd %xmm0, %xmm1
97; X86-NEXT:    movq %xmm1, (%eax)
98; X86-NEXT:    retl
99  %a = load <2 x i32>, ptr %x
100  %b = urem <2 x i32> %a, <i32 7, i32 7>
101  store <2 x i32> %b, ptr %y
102  ret void
103}
104
105define void @test_sdiv7_v2i32(ptr %x, ptr %y) nounwind {
106; X64-LABEL: test_sdiv7_v2i32:
107; X64:       # %bb.0:
108; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
109; X64-NEXT:    movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
110; X64-NEXT:    movdqa %xmm0, %xmm2
111; X64-NEXT:    pmuludq %xmm1, %xmm2
112; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
113; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
114; X64-NEXT:    pmuludq %xmm1, %xmm3
115; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
116; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
117; X64-NEXT:    pxor %xmm3, %xmm3
118; X64-NEXT:    pcmpgtd %xmm0, %xmm3
119; X64-NEXT:    pand %xmm1, %xmm3
120; X64-NEXT:    paddd %xmm0, %xmm3
121; X64-NEXT:    psubd %xmm3, %xmm2
122; X64-NEXT:    paddd %xmm0, %xmm2
123; X64-NEXT:    movdqa %xmm2, %xmm0
124; X64-NEXT:    psrld $31, %xmm0
125; X64-NEXT:    psrad $2, %xmm2
126; X64-NEXT:    paddd %xmm0, %xmm2
127; X64-NEXT:    movq %xmm2, (%rsi)
128; X64-NEXT:    retq
129;
130; X86-LABEL: test_sdiv7_v2i32:
131; X86:       # %bb.0:
132; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
133; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
134; X86-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
135; X86-NEXT:    movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
136; X86-NEXT:    movdqa %xmm1, %xmm0
137; X86-NEXT:    pmuludq %xmm2, %xmm0
138; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
139; X86-NEXT:    movdqa %xmm1, %xmm3
140; X86-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,1,1]
141; X86-NEXT:    pmuludq %xmm2, %xmm3
142; X86-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
143; X86-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
144; X86-NEXT:    pxor %xmm3, %xmm3
145; X86-NEXT:    pcmpgtd %xmm1, %xmm3
146; X86-NEXT:    pand %xmm2, %xmm3
147; X86-NEXT:    paddd %xmm1, %xmm3
148; X86-NEXT:    psubd %xmm3, %xmm0
149; X86-NEXT:    paddd %xmm1, %xmm0
150; X86-NEXT:    movdqa %xmm0, %xmm1
151; X86-NEXT:    psrld $31, %xmm1
152; X86-NEXT:    psrad $2, %xmm0
153; X86-NEXT:    paddd %xmm1, %xmm0
154; X86-NEXT:    movq %xmm0, (%eax)
155; X86-NEXT:    retl
156  %a = load <2 x i32>, ptr %x
157  %b = sdiv <2 x i32> %a, <i32 7, i32 7>
158  store <2 x i32> %b, ptr %y
159  ret void
160}
161
162define void @test_srem7_v2i32(ptr %x, ptr %y) nounwind {
163; X64-LABEL: test_srem7_v2i32:
164; X64:       # %bb.0:
165; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
166; X64-NEXT:    movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
167; X64-NEXT:    movdqa %xmm0, %xmm2
168; X64-NEXT:    pmuludq %xmm1, %xmm2
169; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
170; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
171; X64-NEXT:    pmuludq %xmm1, %xmm3
172; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
173; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
174; X64-NEXT:    pxor %xmm3, %xmm3
175; X64-NEXT:    pcmpgtd %xmm0, %xmm3
176; X64-NEXT:    pand %xmm1, %xmm3
177; X64-NEXT:    paddd %xmm0, %xmm3
178; X64-NEXT:    psubd %xmm3, %xmm2
179; X64-NEXT:    paddd %xmm0, %xmm2
180; X64-NEXT:    movdqa %xmm2, %xmm1
181; X64-NEXT:    psrld $31, %xmm1
182; X64-NEXT:    psrad $2, %xmm2
183; X64-NEXT:    paddd %xmm1, %xmm2
184; X64-NEXT:    movdqa %xmm2, %xmm1
185; X64-NEXT:    pslld $3, %xmm1
186; X64-NEXT:    psubd %xmm1, %xmm2
187; X64-NEXT:    paddd %xmm0, %xmm2
188; X64-NEXT:    movq %xmm2, (%rsi)
189; X64-NEXT:    retq
190;
191; X86-LABEL: test_srem7_v2i32:
192; X86:       # %bb.0:
193; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
194; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
195; X86-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
196; X86-NEXT:    movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
197; X86-NEXT:    movdqa %xmm0, %xmm1
198; X86-NEXT:    pmuludq %xmm2, %xmm1
199; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
200; X86-NEXT:    movdqa %xmm0, %xmm3
201; X86-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,1,1]
202; X86-NEXT:    pmuludq %xmm2, %xmm3
203; X86-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
204; X86-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
205; X86-NEXT:    pxor %xmm3, %xmm3
206; X86-NEXT:    pcmpgtd %xmm0, %xmm3
207; X86-NEXT:    pand %xmm2, %xmm3
208; X86-NEXT:    paddd %xmm0, %xmm3
209; X86-NEXT:    psubd %xmm3, %xmm1
210; X86-NEXT:    paddd %xmm0, %xmm1
211; X86-NEXT:    movdqa %xmm1, %xmm2
212; X86-NEXT:    psrld $31, %xmm2
213; X86-NEXT:    psrad $2, %xmm1
214; X86-NEXT:    paddd %xmm2, %xmm1
215; X86-NEXT:    movdqa %xmm1, %xmm2
216; X86-NEXT:    pslld $3, %xmm2
217; X86-NEXT:    psubd %xmm2, %xmm1
218; X86-NEXT:    paddd %xmm0, %xmm1
219; X86-NEXT:    movq %xmm1, (%eax)
220; X86-NEXT:    retl
221  %a = load <2 x i32>, ptr %x
222  %b = srem <2 x i32> %a, <i32 7, i32 7>
223  store <2 x i32> %b, ptr %y
224  ret void
225}
226
227define void @test_udiv_pow2_v2i32(ptr %x, ptr %y) nounwind {
228; X64-LABEL: test_udiv_pow2_v2i32:
229; X64:       # %bb.0:
230; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
231; X64-NEXT:    psrld $3, %xmm0
232; X64-NEXT:    movq %xmm0, (%rsi)
233; X64-NEXT:    retq
234;
235; X86-LABEL: test_udiv_pow2_v2i32:
236; X86:       # %bb.0:
237; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
238; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
239; X86-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
240; X86-NEXT:    psrld $3, %xmm0
241; X86-NEXT:    movq %xmm0, (%eax)
242; X86-NEXT:    retl
243  %a = load <2 x i32>, ptr %x
244  %b = udiv <2 x i32> %a, <i32 8, i32 8>
245  store <2 x i32> %b, ptr %y
246  ret void
247}
248
249define void @test_urem_pow2_v2i32(ptr %x, ptr %y) nounwind {
250; X64-LABEL: test_urem_pow2_v2i32:
251; X64:       # %bb.0:
252; X64-NEXT:    movabsq $30064771079, %rax # imm = 0x700000007
253; X64-NEXT:    andq (%rdi), %rax
254; X64-NEXT:    movq %rax, (%rsi)
255; X64-NEXT:    retq
256;
257; X86-LABEL: test_urem_pow2_v2i32:
258; X86:       # %bb.0:
259; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
260; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
261; X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
262; X86-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
263; X86-NEXT:    movlps %xmm0, (%eax)
264; X86-NEXT:    retl
265  %a = load <2 x i32>, ptr %x
266  %b = urem <2 x i32> %a, <i32 8, i32 8>
267  store <2 x i32> %b, ptr %y
268  ret void
269}
270
271define void @test_sdiv_pow2_v2i32(ptr %x, ptr %y) nounwind {
272; X64-LABEL: test_sdiv_pow2_v2i32:
273; X64:       # %bb.0:
274; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
275; X64-NEXT:    movdqa %xmm0, %xmm1
276; X64-NEXT:    psrad $31, %xmm1
277; X64-NEXT:    psrld $29, %xmm1
278; X64-NEXT:    paddd %xmm0, %xmm1
279; X64-NEXT:    psrad $3, %xmm1
280; X64-NEXT:    movq %xmm1, (%rsi)
281; X64-NEXT:    retq
282;
283; X86-LABEL: test_sdiv_pow2_v2i32:
284; X86:       # %bb.0:
285; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
286; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
287; X86-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
288; X86-NEXT:    movdqa %xmm0, %xmm1
289; X86-NEXT:    psrad $31, %xmm1
290; X86-NEXT:    psrld $29, %xmm1
291; X86-NEXT:    paddd %xmm0, %xmm1
292; X86-NEXT:    psrad $3, %xmm1
293; X86-NEXT:    movq %xmm1, (%eax)
294; X86-NEXT:    retl
295  %a = load <2 x i32>, ptr %x
296  %b = sdiv <2 x i32> %a, <i32 8, i32 8>
297  store <2 x i32> %b, ptr %y
298  ret void
299}
300
301define void @test_srem_pow2_v2i32(ptr %x, ptr %y) nounwind {
302; X64-LABEL: test_srem_pow2_v2i32:
303; X64:       # %bb.0:
304; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
305; X64-NEXT:    psrld $3, %xmm0
306; X64-NEXT:    movq %xmm0, (%rsi)
307; X64-NEXT:    retq
308;
309; X86-LABEL: test_srem_pow2_v2i32:
310; X86:       # %bb.0:
311; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
312; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
313; X86-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
314; X86-NEXT:    psrld $3, %xmm0
315; X86-NEXT:    movq %xmm0, (%eax)
316; X86-NEXT:    retl
317  %a = load <2 x i32>, ptr %x
318  %b = udiv <2 x i32> %a, <i32 8, i32 8>
319  store <2 x i32> %b, ptr %y
320  ret void
321}
322
323define void @test_udiv_v2i32(ptr %x, ptr %y, ptr %z) nounwind {
324; X64-LABEL: test_udiv_v2i32:
325; X64:       # %bb.0:
326; X64-NEXT:    movq %rdx, %rcx
327; X64-NEXT:    movq (%rdi), %rax
328; X64-NEXT:    movq %rax, %xmm0
329; X64-NEXT:    movq (%rsi), %rsi
330; X64-NEXT:    movq %rsi, %xmm1
331; X64-NEXT:    # kill: def $eax killed $eax killed $rax
332; X64-NEXT:    xorl %edx, %edx
333; X64-NEXT:    divl %esi
334; X64-NEXT:    movd %eax, %xmm2
335; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
336; X64-NEXT:    movd %xmm0, %eax
337; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
338; X64-NEXT:    movd %xmm0, %esi
339; X64-NEXT:    xorl %edx, %edx
340; X64-NEXT:    divl %esi
341; X64-NEXT:    movd %eax, %xmm0
342; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
343; X64-NEXT:    movq %xmm2, (%rcx)
344; X64-NEXT:    retq
345;
346; X86-LABEL: test_udiv_v2i32:
347; X86:       # %bb.0:
348; X86-NEXT:    pushl %esi
349; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
350; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
351; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
352; X86-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
353; X86-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
354; X86-NEXT:    movd %xmm0, %eax
355; X86-NEXT:    movd %xmm1, %esi
356; X86-NEXT:    xorl %edx, %edx
357; X86-NEXT:    divl %esi
358; X86-NEXT:    movd %eax, %xmm2
359; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
360; X86-NEXT:    movd %xmm0, %eax
361; X86-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
362; X86-NEXT:    movd %xmm1, %esi
363; X86-NEXT:    xorl %edx, %edx
364; X86-NEXT:    divl %esi
365; X86-NEXT:    movd %eax, %xmm0
366; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
367; X86-NEXT:    movq %xmm2, (%ecx)
368; X86-NEXT:    popl %esi
369; X86-NEXT:    retl
370  %a = load <2 x i32>, ptr %x
371  %b = load <2 x i32>, ptr %y
372  %c = udiv <2 x i32> %a, %b
373  store <2 x i32> %c, ptr %z
374  ret void
375}
376
377define void @test_urem_v2i32(ptr %x, ptr %y, ptr %z) nounwind {
378; X64-LABEL: test_urem_v2i32:
379; X64:       # %bb.0:
380; X64-NEXT:    movq %rdx, %rcx
381; X64-NEXT:    movq (%rdi), %rax
382; X64-NEXT:    movq %rax, %xmm0
383; X64-NEXT:    movq (%rsi), %rsi
384; X64-NEXT:    movq %rsi, %xmm1
385; X64-NEXT:    # kill: def $eax killed $eax killed $rax
386; X64-NEXT:    xorl %edx, %edx
387; X64-NEXT:    divl %esi
388; X64-NEXT:    movd %edx, %xmm2
389; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
390; X64-NEXT:    movd %xmm0, %eax
391; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
392; X64-NEXT:    movd %xmm0, %esi
393; X64-NEXT:    xorl %edx, %edx
394; X64-NEXT:    divl %esi
395; X64-NEXT:    movd %edx, %xmm0
396; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
397; X64-NEXT:    movq %xmm2, (%rcx)
398; X64-NEXT:    retq
399;
400; X86-LABEL: test_urem_v2i32:
401; X86:       # %bb.0:
402; X86-NEXT:    pushl %esi
403; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
404; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
405; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
406; X86-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
407; X86-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
408; X86-NEXT:    movd %xmm0, %eax
409; X86-NEXT:    movd %xmm1, %esi
410; X86-NEXT:    xorl %edx, %edx
411; X86-NEXT:    divl %esi
412; X86-NEXT:    movd %edx, %xmm2
413; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
414; X86-NEXT:    movd %xmm0, %eax
415; X86-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
416; X86-NEXT:    movd %xmm1, %esi
417; X86-NEXT:    xorl %edx, %edx
418; X86-NEXT:    divl %esi
419; X86-NEXT:    movd %edx, %xmm0
420; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
421; X86-NEXT:    movq %xmm2, (%ecx)
422; X86-NEXT:    popl %esi
423; X86-NEXT:    retl
424  %a = load <2 x i32>, ptr %x
425  %b = load <2 x i32>, ptr %y
426  %c = urem <2 x i32> %a, %b
427  store <2 x i32> %c, ptr %z
428  ret void
429}
430
431define void @test_sdiv_v2i32(ptr %x, ptr %y, ptr %z) nounwind {
432; X64-LABEL: test_sdiv_v2i32:
433; X64:       # %bb.0:
434; X64-NEXT:    movq %rdx, %rcx
435; X64-NEXT:    movq (%rdi), %rax
436; X64-NEXT:    movq %rax, %xmm0
437; X64-NEXT:    movq (%rsi), %rsi
438; X64-NEXT:    movq %rsi, %xmm1
439; X64-NEXT:    # kill: def $eax killed $eax killed $rax
440; X64-NEXT:    cltd
441; X64-NEXT:    idivl %esi
442; X64-NEXT:    movd %eax, %xmm2
443; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
444; X64-NEXT:    movd %xmm0, %eax
445; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
446; X64-NEXT:    movd %xmm0, %esi
447; X64-NEXT:    cltd
448; X64-NEXT:    idivl %esi
449; X64-NEXT:    movd %eax, %xmm0
450; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
451; X64-NEXT:    movq %xmm2, (%rcx)
452; X64-NEXT:    retq
453;
454; X86-LABEL: test_sdiv_v2i32:
455; X86:       # %bb.0:
456; X86-NEXT:    pushl %esi
457; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
458; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
459; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
460; X86-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
461; X86-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
462; X86-NEXT:    movd %xmm0, %eax
463; X86-NEXT:    movd %xmm1, %esi
464; X86-NEXT:    cltd
465; X86-NEXT:    idivl %esi
466; X86-NEXT:    movd %eax, %xmm2
467; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
468; X86-NEXT:    movd %xmm0, %eax
469; X86-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
470; X86-NEXT:    movd %xmm1, %esi
471; X86-NEXT:    cltd
472; X86-NEXT:    idivl %esi
473; X86-NEXT:    movd %eax, %xmm0
474; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
475; X86-NEXT:    movq %xmm2, (%ecx)
476; X86-NEXT:    popl %esi
477; X86-NEXT:    retl
478  %a = load <2 x i32>, ptr %x
479  %b = load <2 x i32>, ptr %y
480  %c = sdiv <2 x i32> %a, %b
481  store <2 x i32> %c, ptr %z
482  ret void
483}
484
485define void @test_srem_v2i32(ptr %x, ptr %y, ptr %z) nounwind {
486; X64-LABEL: test_srem_v2i32:
487; X64:       # %bb.0:
488; X64-NEXT:    movq %rdx, %rcx
489; X64-NEXT:    movq (%rdi), %rax
490; X64-NEXT:    movq %rax, %xmm0
491; X64-NEXT:    movq (%rsi), %rsi
492; X64-NEXT:    movq %rsi, %xmm1
493; X64-NEXT:    # kill: def $eax killed $eax killed $rax
494; X64-NEXT:    cltd
495; X64-NEXT:    idivl %esi
496; X64-NEXT:    movd %eax, %xmm2
497; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
498; X64-NEXT:    movd %xmm0, %eax
499; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
500; X64-NEXT:    movd %xmm0, %esi
501; X64-NEXT:    cltd
502; X64-NEXT:    idivl %esi
503; X64-NEXT:    movd %eax, %xmm0
504; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
505; X64-NEXT:    movq %xmm2, (%rcx)
506; X64-NEXT:    retq
507;
508; X86-LABEL: test_srem_v2i32:
509; X86:       # %bb.0:
510; X86-NEXT:    pushl %esi
511; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
512; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
513; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
514; X86-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
515; X86-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
516; X86-NEXT:    movd %xmm0, %eax
517; X86-NEXT:    movd %xmm1, %esi
518; X86-NEXT:    cltd
519; X86-NEXT:    idivl %esi
520; X86-NEXT:    movd %eax, %xmm2
521; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
522; X86-NEXT:    movd %xmm0, %eax
523; X86-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
524; X86-NEXT:    movd %xmm1, %esi
525; X86-NEXT:    cltd
526; X86-NEXT:    idivl %esi
527; X86-NEXT:    movd %eax, %xmm0
528; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
529; X86-NEXT:    movq %xmm2, (%ecx)
530; X86-NEXT:    popl %esi
531; X86-NEXT:    retl
532  %a = load <2 x i32>, ptr %x
533  %b = load <2 x i32>, ptr %y
534  %c = sdiv <2 x i32> %a, %b
535  store <2 x i32> %c, ptr %z
536  ret void
537}
538