xref: /llvm-project/llvm/test/CodeGen/X86/sdiv-exact.ll (revision 59087dce3b588c48dbb102d801dd986ea81eaac9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
4
5define i32 @test1(i32 %x) {
6; X86-LABEL: test1:
7; X86:       # %bb.0:
8; X86-NEXT:    imull $-1030792151, {{[0-9]+}}(%esp), %eax # imm = 0xC28F5C29
9; X86-NEXT:    retl
10;
11; X64-LABEL: test1:
12; X64:       # %bb.0:
13; X64-NEXT:    imull $-1030792151, %edi, %eax # imm = 0xC28F5C29
14; X64-NEXT:    retq
15  %div = sdiv exact i32 %x, 25
16  ret i32 %div
17}
18
19define i32 @test2(i32 %x) {
20; X86-LABEL: test2:
21; X86:       # %bb.0:
22; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
23; X86-NEXT:    sarl $3, %eax
24; X86-NEXT:    imull $-1431655765, %eax, %eax # imm = 0xAAAAAAAB
25; X86-NEXT:    retl
26;
27; X64-LABEL: test2:
28; X64:       # %bb.0:
29; X64-NEXT:    sarl $3, %edi
30; X64-NEXT:    imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
31; X64-NEXT:    retq
32  %div = sdiv exact i32 %x, 24
33  ret i32 %div
34}
35
36define <4 x i32> @test3(<4 x i32> %x) {
37; X86-LABEL: test3:
38; X86:       # %bb.0:
39; X86-NEXT:    psrad $3, %xmm0
40; X86-NEXT:    movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
41; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
42; X86-NEXT:    pmuludq %xmm1, %xmm0
43; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
44; X86-NEXT:    pmuludq %xmm1, %xmm2
45; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
46; X86-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
47; X86-NEXT:    retl
48;
49; X64-LABEL: test3:
50; X64:       # %bb.0:
51; X64-NEXT:    vpsrad $3, %xmm0, %xmm0
52; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
53; X64-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
54; X64-NEXT:    retq
55  %div = sdiv exact <4 x i32> %x, <i32 24, i32 24, i32 24, i32 24>
56  ret <4 x i32> %div
57}
58
59define <4 x i32> @test4(<4 x i32> %x) {
60; X86-LABEL: test4:
61; X86:       # %bb.0:
62; X86-NEXT:    movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
63; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
64; X86-NEXT:    pmuludq %xmm1, %xmm0
65; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
66; X86-NEXT:    pmuludq %xmm1, %xmm2
67; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
68; X86-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
69; X86-NEXT:    retl
70;
71; X64-LABEL: test4:
72; X64:       # %bb.0:
73; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
74; X64-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
75; X64-NEXT:    retq
76  %div = sdiv exact <4 x i32> %x, <i32 25, i32 25, i32 25, i32 25>
77  ret <4 x i32> %div
78}
79
80define <4 x i32> @test5(<4 x i32> %x) {
81; X86-LABEL: test5:
82; X86:       # %bb.0:
83; X86-NEXT:    movdqa %xmm0, %xmm1
84; X86-NEXT:    psrad $3, %xmm1
85; X86-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
86; X86-NEXT:    movdqa {{.*#+}} xmm0 = [2863311531,2863311531,3264175145,3264175145]
87; X86-NEXT:    pmuludq %xmm1, %xmm0
88; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
89; X86-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
90; X86-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
91; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
92; X86-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
93; X86-NEXT:    retl
94;
95; X64-LABEL: test5:
96; X64:       # %bb.0:
97; X64-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
98; X64-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
99; X64-NEXT:    retq
100  %div = sdiv exact <4 x i32> %x, <i32 24, i32 24, i32 25, i32 25>
101  ret <4 x i32> %div
102}
103
104define <4 x i32> @test6(<4 x i32> %x) {
105; X86-LABEL: test6:
106; X86:       # %bb.0:
107; X86-NEXT:    movdqa %xmm0, %xmm1
108; X86-NEXT:    psrad $3, %xmm1
109; X86-NEXT:    psrad $1, %xmm0
110; X86-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
111; X86-NEXT:    movdqa {{.*#+}} xmm1 = [2863311531,2863311531,3303820997,3303820997]
112; X86-NEXT:    pmuludq %xmm0, %xmm1
113; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
114; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
115; X86-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
116; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
117; X86-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
118; X86-NEXT:    movdqa %xmm1, %xmm0
119; X86-NEXT:    retl
120;
121; X64-LABEL: test6:
122; X64:       # %bb.0:
123; X64-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
124; X64-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
125; X64-NEXT:    retq
126  %div = sdiv exact <4 x i32> %x, <i32 24, i32 24, i32 26, i32 26>
127  ret <4 x i32> %div
128}
129
130define <4 x i32> @test7(<4 x i32> %x) {
131; X86-LABEL: test7:
132; X86:       # %bb.0:
133; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
134; X86-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
135; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
136; X86-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
137; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
138; X86-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
139; X86-NEXT:    retl
140;
141; X64-LABEL: test7:
142; X64:       # %bb.0:
143; X64-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
144; X64-NEXT:    retq
145  %div = sdiv exact <4 x i32> %x, <i32 25, i32 25, i32 27, i32 27>
146  ret <4 x i32> %div
147}
148
149define <4 x i32> @test8(<4 x i32> %x) {
150; X86-LABEL: test8:
151; X86:       # %bb.0:
152; X86-NEXT:    movdqa %xmm0, %xmm1
153; X86-NEXT:    psrad $3, %xmm1
154; X86-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
155; X86-NEXT:    movdqa {{.*#+}} xmm0 = [1,1,2863311531,2863311531]
156; X86-NEXT:    pmuludq %xmm1, %xmm0
157; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
158; X86-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
159; X86-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
160; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
161; X86-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
162; X86-NEXT:    retl
163;
164; X64-LABEL: test8:
165; X64:       # %bb.0:
166; X64-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
167; X64-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
168; X64-NEXT:    retq
169  %div = sdiv exact <4 x i32> %x, <i32 1, i32 1, i32 24, i32 24>
170  ret <4 x i32> %div
171}
172