xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll (revision 5d9c717597aef72e4ba27a2b143e9753c513e5c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s
4
5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
6
7define i32 @v_srem_i32(i32 %num, i32 %den) {
8; GISEL-LABEL: v_srem_i32:
9; GISEL:       ; %bb.0:
10; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
12; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
13; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
14; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
15; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
16; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
17; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
18; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
19; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
20; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
21; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
22; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v3
23; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
24; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
25; GISEL-NEXT:    v_mul_hi_u32 v3, v0, v3
26; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v1
27; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
28; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
29; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
30; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
31; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
32; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
33; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
34; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
35; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
36; GISEL-NEXT:    s_setpc_b64 s[30:31]
37;
38; CGP-LABEL: v_srem_i32:
39; CGP:       ; %bb.0:
40; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
42; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
43; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
44; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
45; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
46; CGP-NEXT:    v_xor_b32_e32 v1, v1, v3
47; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v1
48; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
49; CGP-NEXT:    v_rcp_f32_e32 v3, v3
50; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
51; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
52; CGP-NEXT:    v_mul_lo_u32 v4, v4, v3
53; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
54; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
55; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
56; CGP-NEXT:    v_mul_lo_u32 v3, v3, v1
57; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
58; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
59; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
60; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
61; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
62; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
63; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
64; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
65; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
66; CGP-NEXT:    s_setpc_b64 s[30:31]
67  %result = srem i32 %num, %den
68  ret i32 %result
69}
70
71; FIXME: This is a workaround for not handling uniform VGPR case.
72declare i32 @llvm.amdgcn.readfirstlane(i32)
73
74define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) {
75; GISEL-LABEL: s_srem_i32:
76; GISEL:       ; %bb.0:
77; GISEL-NEXT:    s_ashr_i32 s2, s0, 31
78; GISEL-NEXT:    s_ashr_i32 s3, s1, 31
79; GISEL-NEXT:    s_add_i32 s0, s0, s2
80; GISEL-NEXT:    s_add_i32 s1, s1, s3
81; GISEL-NEXT:    s_xor_b32 s0, s0, s2
82; GISEL-NEXT:    s_xor_b32 s1, s1, s3
83; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, s1
84; GISEL-NEXT:    s_sub_i32 s3, 0, s1
85; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
86; GISEL-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
87; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
88; GISEL-NEXT:    v_mul_lo_u32 v1, s3, v0
89; GISEL-NEXT:    v_mul_hi_u32 v1, v0, v1
90; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
91; GISEL-NEXT:    v_mul_hi_u32 v0, s0, v0
92; GISEL-NEXT:    v_mul_lo_u32 v0, v0, s1
93; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
94; GISEL-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
95; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
96; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
97; GISEL-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
98; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
99; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
100; GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
101; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s2, v0
102; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
103; GISEL-NEXT:    ; return to shader part epilog
104;
105; CGP-LABEL: s_srem_i32:
106; CGP:       ; %bb.0:
107; CGP-NEXT:    s_ashr_i32 s2, s0, 31
108; CGP-NEXT:    s_ashr_i32 s3, s1, 31
109; CGP-NEXT:    s_add_i32 s0, s0, s2
110; CGP-NEXT:    s_add_i32 s1, s1, s3
111; CGP-NEXT:    s_xor_b32 s0, s0, s2
112; CGP-NEXT:    s_xor_b32 s1, s1, s3
113; CGP-NEXT:    v_cvt_f32_u32_e32 v0, s1
114; CGP-NEXT:    s_sub_i32 s3, 0, s1
115; CGP-NEXT:    v_rcp_f32_e32 v0, v0
116; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
117; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
118; CGP-NEXT:    v_mul_lo_u32 v1, s3, v0
119; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
120; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
121; CGP-NEXT:    v_mul_hi_u32 v0, s0, v0
122; CGP-NEXT:    v_mul_lo_u32 v0, v0, s1
123; CGP-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
124; CGP-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
125; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
126; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
127; CGP-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
128; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
129; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
130; CGP-NEXT:    v_xor_b32_e32 v0, s2, v0
131; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s2, v0
132; CGP-NEXT:    v_readfirstlane_b32 s0, v0
133; CGP-NEXT:    ; return to shader part epilog
134  %result = srem i32 %num, %den
135  %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result)
136  ret i32 %readlane
137}
138
139define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) {
140; GISEL-LABEL: v_srem_v2i32:
141; GISEL:       ; %bb.0:
142; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
144; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
145; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
146; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
147; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
148; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
149; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
150; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
151; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
152; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
153; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
154; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
155; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v2
156; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
157; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
158; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
159; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
160; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
161; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
162; GISEL-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
163; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
164; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
165; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v5
166; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v8
167; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
168; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
169; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
170; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
171; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
172; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
173; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v2
174; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
175; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
176; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
177; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
178; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
179; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
180; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
181; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
182; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
183; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
184; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
185; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
186; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
187; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
188; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
189; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
190; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
191; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
192; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
193; GISEL-NEXT:    s_setpc_b64 s[30:31]
194;
195; CGP-LABEL: v_srem_v2i32:
196; CGP:       ; %bb.0:
197; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
199; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
200; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
201; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
202; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
203; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
204; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
205; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
206; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
207; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
208; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
209; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
210; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v2
211; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
212; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v3
213; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
214; CGP-NEXT:    v_rcp_f32_e32 v5, v5
215; CGP-NEXT:    v_rcp_f32_e32 v8, v8
216; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
217; CGP-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
218; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
219; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
220; CGP-NEXT:    v_mul_lo_u32 v7, v7, v5
221; CGP-NEXT:    v_mul_lo_u32 v9, v9, v8
222; CGP-NEXT:    v_mul_hi_u32 v7, v5, v7
223; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
224; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
225; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
226; CGP-NEXT:    v_mul_hi_u32 v5, v0, v5
227; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
228; CGP-NEXT:    v_mul_lo_u32 v5, v5, v2
229; CGP-NEXT:    v_mul_lo_u32 v7, v7, v3
230; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
231; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
232; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
233; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
234; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
235; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
236; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
237; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
238; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
239; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
240; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
241; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
242; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
243; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
244; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
245; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
246; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
247; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
248; CGP-NEXT:    s_setpc_b64 s[30:31]
249  %result = srem <2 x i32> %num, %den
250  ret <2 x i32> %result
251}
252
253define i32 @v_srem_i32_pow2k_denom(i32 %num) {
254; CHECK-LABEL: v_srem_i32_pow2k_denom:
255; CHECK:       ; %bb.0:
256; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
258; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, 0x45800000
259; CHECK-NEXT:    v_mov_b32_e32 v3, 0xfffff000
260; CHECK-NEXT:    v_mov_b32_e32 v4, 0x1000
261; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
262; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
263; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
264; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
265; CHECK-NEXT:    v_mul_lo_u32 v3, v2, v3
266; CHECK-NEXT:    v_mul_hi_u32 v3, v2, v3
267; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
268; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
269; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 12, v2
270; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
271; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0xfffff000, v0
272; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
273; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
274; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0xfffff000, v0
275; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
276; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
277; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
278; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
279; CHECK-NEXT:    s_setpc_b64 s[30:31]
280  %result = srem i32 %num, 4096
281  ret i32 %result
282}
283
284define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
285; GISEL-LABEL: v_srem_v2i32_pow2k_denom:
286; GISEL:       ; %bb.0:
287; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
289; GISEL-NEXT:    v_mov_b32_e32 v3, 0x1000
290; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
291; GISEL-NEXT:    v_mov_b32_e32 v5, 0xfffff000
292; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
293; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
294; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
295; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
296; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
297; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
298; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
299; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
300; GISEL-NEXT:    v_mul_lo_u32 v7, v4, v5
301; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v7
302; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
303; GISEL-NEXT:    v_mul_hi_u32 v7, v0, v4
304; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
305; GISEL-NEXT:    v_lshlrev_b32_e32 v7, 12, v7
306; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 12, v4
307; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
308; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
309; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v0, v5
310; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 0xfffff000, v1
311; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
312; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
313; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
314; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
315; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v0, v5
316; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 0xfffff000, v1
317; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
318; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
319; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
320; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
321; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
322; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
323; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
324; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
325; GISEL-NEXT:    s_setpc_b64 s[30:31]
326;
327; CGP-LABEL: v_srem_v2i32_pow2k_denom:
328; CGP:       ; %bb.0:
329; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
331; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x45800000
332; CGP-NEXT:    v_mov_b32_e32 v4, 0xfffff000
333; CGP-NEXT:    v_mov_b32_e32 v5, 0x1000
334; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
335; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
336; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
337; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
338; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
339; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
340; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
341; CGP-NEXT:    v_mul_lo_u32 v7, v3, v4
342; CGP-NEXT:    v_mul_hi_u32 v7, v3, v7
343; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
344; CGP-NEXT:    v_mul_hi_u32 v7, v0, v3
345; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
346; CGP-NEXT:    v_lshlrev_b32_e32 v7, 12, v7
347; CGP-NEXT:    v_lshlrev_b32_e32 v3, 12, v3
348; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
349; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
350; CGP-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
351; CGP-NEXT:    v_add_i32_e32 v7, vcc, 0xfffff000, v1
352; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
353; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
354; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
355; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
356; CGP-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
357; CGP-NEXT:    v_add_i32_e32 v4, vcc, 0xfffff000, v1
358; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
359; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
360; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
361; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
362; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
363; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
364; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
365; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
366; CGP-NEXT:    s_setpc_b64 s[30:31]
367  %result = srem <2 x i32> %num, <i32 4096, i32 4096>
368  ret <2 x i32> %result
369}
370
371define i32 @v_srem_i32_oddk_denom(i32 %num) {
372; CHECK-LABEL: v_srem_i32_oddk_denom:
373; CHECK:       ; %bb.0:
374; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
375; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
376; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, 0x4996c7d8
377; CHECK-NEXT:    v_mov_b32_e32 v3, 0xffed2705
378; CHECK-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
379; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
380; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
381; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
382; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
383; CHECK-NEXT:    v_mul_lo_u32 v3, v2, v3
384; CHECK-NEXT:    v_mul_hi_u32 v3, v2, v3
385; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
386; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
387; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v4
388; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
389; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0xffed2705, v0
390; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
391; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
392; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0xffed2705, v0
393; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
394; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
395; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
396; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
397; CHECK-NEXT:    s_setpc_b64 s[30:31]
398  %result = srem i32 %num, 1235195
399  ret i32 %result
400}
401
402define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
403; GISEL-LABEL: v_srem_v2i32_oddk_denom:
404; GISEL:       ; %bb.0:
405; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
407; GISEL-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
408; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
409; GISEL-NEXT:    v_mov_b32_e32 v5, 0xffed2705
410; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
411; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
412; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
413; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
414; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
415; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
416; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
417; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
418; GISEL-NEXT:    v_mul_lo_u32 v7, v4, v5
419; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v7
420; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
421; GISEL-NEXT:    v_mul_hi_u32 v7, v0, v4
422; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
423; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
424; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v3
425; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
426; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
427; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v0, v5
428; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 0xffed2705, v1
429; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
430; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
431; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
432; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
433; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v0, v5
434; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 0xffed2705, v1
435; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
436; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
437; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
438; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
439; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
440; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
441; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
442; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
443; GISEL-NEXT:    s_setpc_b64 s[30:31]
444;
445; CGP-LABEL: v_srem_v2i32_oddk_denom:
446; CGP:       ; %bb.0:
447; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
448; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
449; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, 0x4996c7d8
450; CGP-NEXT:    v_mov_b32_e32 v4, 0xffed2705
451; CGP-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
452; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
453; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
454; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
455; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
456; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
457; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
458; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
459; CGP-NEXT:    v_mul_lo_u32 v7, v3, v4
460; CGP-NEXT:    v_mul_hi_u32 v7, v3, v7
461; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
462; CGP-NEXT:    v_mul_hi_u32 v7, v0, v3
463; CGP-NEXT:    v_mul_hi_u32 v3, v1, v3
464; CGP-NEXT:    v_mul_lo_u32 v7, v7, v5
465; CGP-NEXT:    v_mul_lo_u32 v3, v3, v5
466; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v7
467; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
468; CGP-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
469; CGP-NEXT:    v_add_i32_e32 v7, vcc, 0xffed2705, v1
470; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
471; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
472; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
473; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
474; CGP-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
475; CGP-NEXT:    v_add_i32_e32 v4, vcc, 0xffed2705, v1
476; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
477; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
478; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
479; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
480; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
481; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
482; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
483; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
484; CGP-NEXT:    s_setpc_b64 s[30:31]
485  %result = srem <2 x i32> %num, <i32 1235195, i32 1235195>
486  ret <2 x i32> %result
487}
488
489define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) {
490; CHECK-LABEL: v_srem_i32_pow2_shl_denom:
491; CHECK:       ; %bb.0:
492; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
493; CHECK-NEXT:    v_lshl_b32_e32 v1, 0x1000, v1
494; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
495; CHECK-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
496; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
497; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
498; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
499; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v3
500; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v1
501; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
502; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
503; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
504; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
505; CHECK-NEXT:    v_mul_lo_u32 v4, v4, v3
506; CHECK-NEXT:    v_mul_hi_u32 v4, v3, v4
507; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
508; CHECK-NEXT:    v_mul_hi_u32 v3, v0, v3
509; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v1
510; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
511; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
512; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
513; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
514; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
515; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
516; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
517; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
518; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
519; CHECK-NEXT:    s_setpc_b64 s[30:31]
520  %shl.y = shl i32 4096, %y
521  %r = srem i32 %x, %shl.y
522  ret i32 %r
523}
524
525define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
526; GISEL-LABEL: v_srem_v2i32_pow2_shl_denom:
527; GISEL:       ; %bb.0:
528; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529; GISEL-NEXT:    v_lshl_b32_e32 v2, 0x1000, v2
530; GISEL-NEXT:    v_lshl_b32_e32 v3, 0x1000, v3
531; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
532; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
533; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
534; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
535; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
536; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
537; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
538; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
539; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
540; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
541; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
542; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
543; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
544; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
545; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
546; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
547; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
548; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
549; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
550; GISEL-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
551; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
552; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
553; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
554; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v8
555; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
556; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
557; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
558; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
559; GISEL-NEXT:    v_mul_hi_u32 v6, v0, v6
560; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
561; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v2
562; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
563; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
564; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
565; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
566; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
567; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
568; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
569; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
570; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
571; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
572; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
573; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
574; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
575; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
576; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
577; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
578; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
579; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
580; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
581; GISEL-NEXT:    s_setpc_b64 s[30:31]
582;
583; CGP-LABEL: v_srem_v2i32_pow2_shl_denom:
584; CGP:       ; %bb.0:
585; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
586; CGP-NEXT:    v_lshl_b32_e32 v2, 0x1000, v2
587; CGP-NEXT:    v_lshl_b32_e32 v3, 0x1000, v3
588; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
589; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
590; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
591; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
592; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
593; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
594; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
595; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
596; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
597; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
598; CGP-NEXT:    v_xor_b32_e32 v2, v2, v6
599; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
600; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v2
601; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
602; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v3
603; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
604; CGP-NEXT:    v_rcp_f32_e32 v6, v6
605; CGP-NEXT:    v_rcp_f32_e32 v8, v8
606; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
607; CGP-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
608; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
609; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
610; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
611; CGP-NEXT:    v_mul_lo_u32 v9, v9, v8
612; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
613; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
614; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
615; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
616; CGP-NEXT:    v_mul_hi_u32 v6, v0, v6
617; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
618; CGP-NEXT:    v_mul_lo_u32 v6, v6, v2
619; CGP-NEXT:    v_mul_lo_u32 v7, v7, v3
620; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
621; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
622; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
623; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
624; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
625; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
626; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
627; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
628; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
629; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
630; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
631; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
632; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
633; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
634; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
635; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
636; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
637; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
638; CGP-NEXT:    s_setpc_b64 s[30:31]
639  %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
640  %r = srem <2 x i32> %x, %shl.y
641  ret <2 x i32> %r
642}
643
644define i32 @v_srem_i32_24bit(i32 %num, i32 %den) {
645; GISEL-LABEL: v_srem_i32_24bit:
646; GISEL:       ; %bb.0:
647; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
648; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
649; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
650; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
651; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
652; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
653; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
654; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
655; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
656; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
657; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
658; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
659; GISEL-NEXT:    v_mul_lo_u32 v2, v2, v1
660; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
661; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
662; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
663; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
664; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
665; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
666; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
667; GISEL-NEXT:    s_setpc_b64 s[30:31]
668;
669; CGP-LABEL: v_srem_i32_24bit:
670; CGP:       ; %bb.0:
671; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
672; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
673; CGP-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
674; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
675; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
676; CGP-NEXT:    v_rcp_f32_e32 v2, v2
677; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
678; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
679; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
680; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
681; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
682; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
683; CGP-NEXT:    v_mul_lo_u32 v2, v2, v1
684; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
685; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
686; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
687; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
688; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
689; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
690; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
691; CGP-NEXT:    s_setpc_b64 s[30:31]
692  %num.mask = and i32 %num, 16777215
693  %den.mask = and i32 %den, 16777215
694  %result = srem i32 %num.mask, %den.mask
695  ret i32 %result
696}
697
698define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
699; GISEL-LABEL: v_srem_v2i32_24bit:
700; GISEL:       ; %bb.0:
701; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
702; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
703; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
704; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
705; GISEL-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
706; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
707; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
708; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v3
709; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
710; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
711; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
712; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
713; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
714; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
715; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
716; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
717; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
718; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
719; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
720; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
721; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
722; GISEL-NEXT:    v_mul_hi_u32 v4, v0, v4
723; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
724; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v2
725; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v3
726; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
727; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
728; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
729; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
730; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
731; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
732; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
733; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
734; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
735; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
736; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
737; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
738; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
739; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
740; GISEL-NEXT:    s_setpc_b64 s[30:31]
741;
742; CGP-LABEL: v_srem_v2i32_24bit:
743; CGP:       ; %bb.0:
744; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
745; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
746; CGP-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
747; CGP-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
748; CGP-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
749; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
750; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
751; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
752; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
753; CGP-NEXT:    v_rcp_f32_e32 v4, v4
754; CGP-NEXT:    v_rcp_f32_e32 v6, v6
755; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
756; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
757; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
758; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
759; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
760; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
761; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
762; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
763; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
764; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
765; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
766; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
767; CGP-NEXT:    v_mul_lo_u32 v4, v4, v2
768; CGP-NEXT:    v_mul_lo_u32 v5, v5, v3
769; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
770; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
771; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
772; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
773; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
774; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
775; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
776; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
777; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
778; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
779; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
780; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
781; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
782; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
783; CGP-NEXT:    s_setpc_b64 s[30:31]
784  %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
785  %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
786  %result = srem <2 x i32> %num.mask, %den.mask
787  ret <2 x i32> %result
788}
789