xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll (revision 5d9c717597aef72e4ba27a2b143e9753c513e5c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s
4
5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
6
7define i32 @v_sdiv_i32(i32 %num, i32 %den) {
8; GISEL-LABEL: v_sdiv_i32:
9; GISEL:       ; %bb.0:
10; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
12; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
13; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
14; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
15; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
16; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
17; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v1
18; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v1
19; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
20; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
21; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
22; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
23; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
24; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
25; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
26; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v1
27; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
28; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
29; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
30; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
31; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v0, v1
32; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
33; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v4
34; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
35; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
36; GISEL-NEXT:    v_xor_b32_e32 v1, v2, v3
37; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v1
38; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
39; GISEL-NEXT:    s_setpc_b64 s[30:31]
40;
41; CGP-LABEL: v_sdiv_i32:
42; CGP:       ; %bb.0:
43; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
45; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
46; CGP-NEXT:    v_xor_b32_e32 v4, v2, v3
47; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
48; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
49; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
50; CGP-NEXT:    v_xor_b32_e32 v1, v1, v3
51; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
52; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
53; CGP-NEXT:    v_rcp_f32_e32 v2, v2
54; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
55; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
56; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
57; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
58; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
59; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
60; CGP-NEXT:    v_mul_lo_u32 v3, v2, v1
61; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
62; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
63; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
64; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
65; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v1
66; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
67; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
68; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
69; CGP-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
70; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
71; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
72; CGP-NEXT:    s_setpc_b64 s[30:31]
73  %result = sdiv i32 %num, %den
74  ret i32 %result
75}
76
77; FIXME: This is a workaround for not handling uniform VGPR case.
78declare i32 @llvm.amdgcn.readfirstlane(i32)
79
80define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) {
81; GISEL-LABEL: s_sdiv_i32:
82; GISEL:       ; %bb.0:
83; GISEL-NEXT:    s_ashr_i32 s2, s0, 31
84; GISEL-NEXT:    s_ashr_i32 s3, s1, 31
85; GISEL-NEXT:    s_add_i32 s0, s0, s2
86; GISEL-NEXT:    s_add_i32 s1, s1, s3
87; GISEL-NEXT:    s_xor_b32 s0, s0, s2
88; GISEL-NEXT:    s_xor_b32 s4, s1, s3
89; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, s4
90; GISEL-NEXT:    s_sub_i32 s1, 0, s4
91; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
92; GISEL-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
93; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
94; GISEL-NEXT:    v_mul_lo_u32 v1, s1, v0
95; GISEL-NEXT:    v_mul_hi_u32 v1, v0, v1
96; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
97; GISEL-NEXT:    v_mul_hi_u32 v0, s0, v0
98; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s4
99; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
100; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
101; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
102; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
103; GISEL-NEXT:    v_subrev_i32_e64 v2, s[0:1], s4, v1
104; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
105; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
106; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
107; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
108; GISEL-NEXT:    s_xor_b32 s0, s2, s3
109; GISEL-NEXT:    v_xor_b32_e32 v0, s0, v0
110; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s0, v0
111; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
112; GISEL-NEXT:    ; return to shader part epilog
113;
114; CGP-LABEL: s_sdiv_i32:
115; CGP:       ; %bb.0:
116; CGP-NEXT:    s_ashr_i32 s2, s0, 31
117; CGP-NEXT:    s_ashr_i32 s3, s1, 31
118; CGP-NEXT:    s_xor_b32 s4, s2, s3
119; CGP-NEXT:    s_add_i32 s0, s0, s2
120; CGP-NEXT:    s_add_i32 s1, s1, s3
121; CGP-NEXT:    s_xor_b32 s0, s0, s2
122; CGP-NEXT:    s_xor_b32 s2, s1, s3
123; CGP-NEXT:    v_cvt_f32_u32_e32 v0, s2
124; CGP-NEXT:    s_sub_i32 s1, 0, s2
125; CGP-NEXT:    v_rcp_f32_e32 v0, v0
126; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
127; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
128; CGP-NEXT:    v_mul_lo_u32 v1, s1, v0
129; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
130; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
131; CGP-NEXT:    v_mul_hi_u32 v0, s0, v0
132; CGP-NEXT:    v_mul_lo_u32 v1, v0, s2
133; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
134; CGP-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
135; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s2, v1
136; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
137; CGP-NEXT:    v_subrev_i32_e64 v2, s[0:1], s2, v1
138; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
139; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
140; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s2, v1
141; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
142; CGP-NEXT:    v_xor_b32_e32 v0, s4, v0
143; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s4, v0
144; CGP-NEXT:    v_readfirstlane_b32 s0, v0
145; CGP-NEXT:    ; return to shader part epilog
146  %result = sdiv i32 %num, %den
147  %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result)
148  ret i32 %readlane
149}
150
151define <2 x i32> @v_sdiv_v2i32(<2 x i32> %num, <2 x i32> %den) {
152; GISEL-LABEL: v_sdiv_v2i32:
153; GISEL:       ; %bb.0:
154; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
156; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
157; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
158; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
159; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
160; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
161; GISEL-NEXT:    v_xor_b32_e32 v8, v4, v5
162; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
163; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
164; GISEL-NEXT:    v_xor_b32_e32 v9, v6, v7
165; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
166; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
167; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
168; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
169; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
170; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
171; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
172; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
173; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
174; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
175; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
176; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
177; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
178; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
179; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
180; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
181; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
182; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
183; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
184; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
185; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
186; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
187; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
188; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
189; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v3
190; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v5
191; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
192; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
193; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
194; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
195; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
196; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
197; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[4:5]
198; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
199; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
200; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
201; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
202; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
203; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
204; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
205; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
206; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
207; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v8
208; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v9
209; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
210; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
211; GISEL-NEXT:    s_setpc_b64 s[30:31]
212;
213; CGP-LABEL: v_sdiv_v2i32:
214; CGP:       ; %bb.0:
215; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
217; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
218; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
219; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
220; CGP-NEXT:    v_xor_b32_e32 v8, v4, v5
221; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
222; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
223; CGP-NEXT:    v_xor_b32_e32 v9, v6, v7
224; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
225; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
226; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
227; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
228; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
229; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
230; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
231; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
232; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
233; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
234; CGP-NEXT:    v_rcp_f32_e32 v4, v4
235; CGP-NEXT:    v_rcp_f32_e32 v6, v6
236; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
237; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
238; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
239; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
240; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
241; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
242; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
243; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
244; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
245; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
246; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
247; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
248; CGP-NEXT:    v_mul_lo_u32 v6, v4, v2
249; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
250; CGP-NEXT:    v_mul_lo_u32 v10, v5, v3
251; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v5
252; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
253; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
254; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
255; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
256; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
257; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
258; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[4:5]
259; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
260; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
261; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
262; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
263; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
264; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
265; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
266; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
267; CGP-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
268; CGP-NEXT:    v_xor_b32_e32 v0, v0, v8
269; CGP-NEXT:    v_xor_b32_e32 v1, v1, v9
270; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
271; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
272; CGP-NEXT:    s_setpc_b64 s[30:31]
273  %result = sdiv <2 x i32> %num, %den
274  ret <2 x i32> %result
275}
276
277define i32 @v_sdiv_i32_pow2k_denom(i32 %num) {
278; CHECK-LABEL: v_sdiv_i32_pow2k_denom:
279; CHECK:       ; %bb.0:
280; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
282; CHECK-NEXT:    v_lshrrev_b32_e32 v1, 20, v1
283; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
284; CHECK-NEXT:    v_ashrrev_i32_e32 v0, 12, v0
285; CHECK-NEXT:    s_setpc_b64 s[30:31]
286  %result = sdiv i32 %num, 4096
287  ret i32 %result
288}
289
290define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
291; CHECK-LABEL: v_sdiv_v2i32_pow2k_denom:
292; CHECK:       ; %bb.0:
293; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
295; CHECK-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
296; CHECK-NEXT:    v_lshrrev_b32_e32 v2, 20, v2
297; CHECK-NEXT:    v_lshrrev_b32_e32 v3, 20, v3
298; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
299; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
300; CHECK-NEXT:    v_ashrrev_i32_e32 v0, 12, v0
301; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 12, v1
302; CHECK-NEXT:    s_setpc_b64 s[30:31]
303  %result = sdiv <2 x i32> %num, <i32 4096, i32 4096>
304  ret <2 x i32> %result
305}
306
307define i32 @v_sdiv_i32_oddk_denom(i32 %num) {
308; CHECK-LABEL: v_sdiv_i32_oddk_denom:
309; CHECK:       ; %bb.0:
310; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
311; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
312; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, 0x4996c7d8
313; CHECK-NEXT:    v_mov_b32_e32 v3, 0xffed2705
314; CHECK-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
315; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
316; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
317; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
318; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
319; CHECK-NEXT:    v_mul_lo_u32 v3, v2, v3
320; CHECK-NEXT:    v_mul_hi_u32 v3, v2, v3
321; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
322; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
323; CHECK-NEXT:    v_mul_lo_u32 v3, v2, v4
324; CHECK-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
325; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
326; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
327; CHECK-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
328; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 0xffed2705, v0
329; CHECK-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[4:5]
330; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
331; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
332; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
333; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
334; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
335; CHECK-NEXT:    s_setpc_b64 s[30:31]
336  %result = sdiv i32 %num, 1235195
337  ret i32 %result
338}
339
340define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
341; GISEL-LABEL: v_sdiv_v2i32_oddk_denom:
342; GISEL:       ; %bb.0:
343; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
345; GISEL-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
346; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
347; GISEL-NEXT:    v_mov_b32_e32 v5, 0xffed2705
348; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
349; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
350; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
351; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
352; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
353; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
354; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
355; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
356; GISEL-NEXT:    v_mul_lo_u32 v7, v4, v5
357; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v7
358; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
359; GISEL-NEXT:    v_mul_hi_u32 v7, v0, v4
360; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
361; GISEL-NEXT:    v_mul_lo_u32 v8, v7, v3
362; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v7
363; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v3
364; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v4
365; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
366; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
367; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
368; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[4:5]
369; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v0, v5
370; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v3
371; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v11, s[6:7]
372; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 0xffed2705, v1
373; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v5, s[4:5]
374; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v7
375; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[6:7]
376; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v4
377; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
378; GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v5, vcc
379; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
380; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v8, vcc
381; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
382; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
383; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
384; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
385; GISEL-NEXT:    s_setpc_b64 s[30:31]
386;
387; CGP-LABEL: v_sdiv_v2i32_oddk_denom:
388; CGP:       ; %bb.0:
389; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
391; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x4996c7d8
392; CGP-NEXT:    v_mov_b32_e32 v4, 0xffed2705
393; CGP-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
394; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
395; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
396; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
397; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
398; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
399; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
400; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
401; CGP-NEXT:    v_mul_lo_u32 v7, v3, v4
402; CGP-NEXT:    v_mul_hi_u32 v7, v3, v7
403; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
404; CGP-NEXT:    v_mul_hi_u32 v7, v0, v3
405; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
406; CGP-NEXT:    v_mul_lo_u32 v8, v7, v5
407; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v7
408; CGP-NEXT:    v_mul_lo_u32 v10, v3, v5
409; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v3
410; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
411; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
412; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
413; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[4:5]
414; CGP-NEXT:    v_add_i32_e32 v4, vcc, v0, v4
415; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v5
416; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[6:7]
417; CGP-NEXT:    v_add_i32_e32 v8, vcc, 0xffed2705, v1
418; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
419; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v7
420; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v8, s[6:7]
421; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v3
422; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
423; CGP-NEXT:    v_cndmask_b32_e32 v0, v7, v4, vcc
424; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
425; CGP-NEXT:    v_cndmask_b32_e32 v1, v3, v8, vcc
426; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
427; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
428; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
429; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
430; CGP-NEXT:    s_setpc_b64 s[30:31]
431  %result = sdiv <2 x i32> %num, <i32 1235195, i32 1235195>
432  ret <2 x i32> %result
433}
434
435define i32 @v_sdiv_i32_pow2_shl_denom(i32 %x, i32 %y) {
436; CHECK-LABEL: v_sdiv_i32_pow2_shl_denom:
437; CHECK:       ; %bb.0:
438; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439; CHECK-NEXT:    v_lshl_b32_e32 v1, 0x1000, v1
440; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
441; CHECK-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
442; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
443; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
444; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
445; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v3
446; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v1
447; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, 0, v1
448; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
449; CHECK-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
450; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
451; CHECK-NEXT:    v_mul_lo_u32 v5, v5, v4
452; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
453; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
454; CHECK-NEXT:    v_mul_hi_u32 v4, v0, v4
455; CHECK-NEXT:    v_mul_lo_u32 v5, v4, v1
456; CHECK-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
457; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
458; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
459; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
460; CHECK-NEXT:    v_sub_i32_e64 v5, s[4:5], v0, v1
461; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
462; CHECK-NEXT:    v_add_i32_e32 v5, vcc, 1, v4
463; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
464; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
465; CHECK-NEXT:    v_xor_b32_e32 v1, v2, v3
466; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
467; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
468; CHECK-NEXT:    s_setpc_b64 s[30:31]
469  %shl.y = shl i32 4096, %y
470  %r = sdiv i32 %x, %shl.y
471  ret i32 %r
472}
473
474define <2 x i32> @v_sdiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
475; GISEL-LABEL: v_sdiv_v2i32_pow2_shl_denom:
476; GISEL:       ; %bb.0:
477; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
478; GISEL-NEXT:    v_lshl_b32_e32 v2, 0x1000, v2
479; GISEL-NEXT:    v_lshl_b32_e32 v3, 0x1000, v3
480; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
481; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
482; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
483; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
484; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
485; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
486; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
487; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
488; GISEL-NEXT:    v_xor_b32_e32 v4, v4, v6
489; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
490; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
491; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v7
492; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
493; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
494; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
495; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
496; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
497; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
498; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
499; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
500; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
501; GISEL-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
502; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
503; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
504; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
505; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v8
506; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
507; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
508; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
509; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
510; GISEL-NEXT:    v_mul_hi_u32 v6, v0, v6
511; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
512; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v2
513; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v6
514; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v3
515; GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v7
516; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
517; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
518; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
519; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v9, vcc
520; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v0, v2
521; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
522; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v11, s[4:5]
523; GISEL-NEXT:    v_sub_i32_e64 v9, s[6:7], v1, v3
524; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
525; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v6
526; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s[4:5]
527; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v7
528; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
529; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v8, vcc
530; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
531; GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v9, vcc
532; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
533; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
534; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
535; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
536; GISEL-NEXT:    s_setpc_b64 s[30:31]
537;
538; CGP-LABEL: v_sdiv_v2i32_pow2_shl_denom:
539; CGP:       ; %bb.0:
540; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
541; CGP-NEXT:    v_lshl_b32_e32 v2, 0x1000, v2
542; CGP-NEXT:    v_lshl_b32_e32 v3, 0x1000, v3
543; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
544; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
545; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
546; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
547; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
548; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
549; CGP-NEXT:    v_xor_b32_e32 v8, v4, v6
550; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
551; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
552; CGP-NEXT:    v_xor_b32_e32 v4, v5, v7
553; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
554; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
555; CGP-NEXT:    v_xor_b32_e32 v2, v2, v6
556; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
557; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v2
558; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
559; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
560; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
561; CGP-NEXT:    v_rcp_f32_e32 v5, v5
562; CGP-NEXT:    v_rcp_f32_e32 v7, v7
563; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
564; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
565; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
566; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
567; CGP-NEXT:    v_mul_lo_u32 v6, v6, v5
568; CGP-NEXT:    v_mul_lo_u32 v9, v9, v7
569; CGP-NEXT:    v_mul_hi_u32 v6, v5, v6
570; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
571; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
572; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v9
573; CGP-NEXT:    v_mul_hi_u32 v5, v0, v5
574; CGP-NEXT:    v_mul_hi_u32 v6, v1, v6
575; CGP-NEXT:    v_mul_lo_u32 v7, v5, v2
576; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
577; CGP-NEXT:    v_mul_lo_u32 v10, v6, v3
578; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v6
579; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
580; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
581; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
582; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
583; CGP-NEXT:    v_sub_i32_e64 v7, s[4:5], v0, v2
584; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
585; CGP-NEXT:    v_cndmask_b32_e64 v6, v6, v11, s[4:5]
586; CGP-NEXT:    v_sub_i32_e64 v9, s[6:7], v1, v3
587; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
588; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
589; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s[4:5]
590; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v6
591; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
592; CGP-NEXT:    v_cndmask_b32_e32 v0, v5, v7, vcc
593; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
594; CGP-NEXT:    v_cndmask_b32_e32 v1, v6, v9, vcc
595; CGP-NEXT:    v_xor_b32_e32 v0, v0, v8
596; CGP-NEXT:    v_xor_b32_e32 v1, v1, v4
597; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
598; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
599; CGP-NEXT:    s_setpc_b64 s[30:31]
600  %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
601  %r = sdiv <2 x i32> %x, %shl.y
602  ret <2 x i32> %r
603}
604
605define i32 @v_sdiv_i32_24bit(i32 %num, i32 %den) {
606; GISEL-LABEL: v_sdiv_i32_24bit:
607; GISEL:       ; %bb.0:
608; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
610; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
611; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
612; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
613; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
614; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
615; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
616; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
617; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
618; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
619; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
620; GISEL-NEXT:    v_mul_lo_u32 v3, v2, v1
621; GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
622; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
623; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
624; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
625; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v1
626; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
627; GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
628; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
629; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
630; GISEL-NEXT:    s_setpc_b64 s[30:31]
631;
632; CGP-LABEL: v_sdiv_i32_24bit:
633; CGP:       ; %bb.0:
634; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
635; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
636; CGP-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
637; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
638; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
639; CGP-NEXT:    v_rcp_f32_e32 v2, v2
640; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
641; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
642; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
643; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
644; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
645; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
646; CGP-NEXT:    v_mul_lo_u32 v3, v2, v1
647; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
648; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
649; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
650; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
651; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v1
652; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
653; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v2
654; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
655; CGP-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
656; CGP-NEXT:    s_setpc_b64 s[30:31]
657  %num.mask = and i32 %num, 16777215
658  %den.mask = and i32 %den, 16777215
659  %result = sdiv i32 %num.mask, %den.mask
660  ret i32 %result
661}
662
663define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
664; GISEL-LABEL: v_sdiv_v2i32_24bit:
665; GISEL:       ; %bb.0:
666; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
667; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
668; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
669; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
670; GISEL-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
671; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
672; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
673; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
674; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
675; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
676; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
677; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
678; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
679; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
680; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
681; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
682; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
683; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
684; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
685; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
686; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
687; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
688; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
689; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
690; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
691; GISEL-NEXT:    v_mul_lo_u32 v8, v5, v3
692; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
693; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
694; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
695; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
696; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
697; GISEL-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
698; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
699; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
700; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
701; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
702; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
703; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
704; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
705; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
706; GISEL-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
707; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
708; GISEL-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
709; GISEL-NEXT:    s_setpc_b64 s[30:31]
710;
711; CGP-LABEL: v_sdiv_v2i32_24bit:
712; CGP:       ; %bb.0:
713; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
714; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
715; CGP-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
716; CGP-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
717; CGP-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
718; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
719; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
720; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
721; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
722; CGP-NEXT:    v_rcp_f32_e32 v4, v4
723; CGP-NEXT:    v_rcp_f32_e32 v6, v6
724; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
725; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
726; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
727; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
728; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
729; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
730; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
731; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
732; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
733; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
734; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
735; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
736; CGP-NEXT:    v_mul_lo_u32 v6, v4, v2
737; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
738; CGP-NEXT:    v_mul_lo_u32 v8, v5, v3
739; CGP-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
740; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
741; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v8
742; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
743; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
744; CGP-NEXT:    v_sub_i32_e64 v6, s[4:5], v0, v2
745; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v3
746; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
747; CGP-NEXT:    v_sub_i32_e64 v7, s[6:7], v1, v3
748; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
749; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
750; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
751; CGP-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
752; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
753; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc
754; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
755; CGP-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
756; CGP-NEXT:    s_setpc_b64 s[30:31]
757  %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
758  %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
759  %result = sdiv <2 x i32> %num.mask, %den.mask
760  ret <2 x i32> %result
761}
762
763define i32 @v_sdiv_i32_exact(i32 %num) {
764; CHECK-LABEL: v_sdiv_i32_exact:
765; CHECK:       ; %bb.0:
766; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
767; CHECK-NEXT:    v_ashrrev_i32_e32 v0, 12, v0
768; CHECK-NEXT:    s_setpc_b64 s[30:31]
769  %result = sdiv exact i32 %num, 4096
770  ret i32 %result
771}
772
773define <2 x i32> @v_sdiv_v2i32_exact(<2 x i32> %num) {
774; CHECK-LABEL: v_sdiv_v2i32_exact:
775; CHECK:       ; %bb.0:
776; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
777; CHECK-NEXT:    v_ashrrev_i32_e32 v0, 12, v0
778; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 10, v1
779; CHECK-NEXT:    s_setpc_b64 s[30:31]
780  %result = sdiv exact <2 x i32> %num, <i32 4096, i32 1024>
781  ret <2 x i32> %result
782}
783