xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
4
5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
6
7define i64 @v_srem_i64(i64 %num, i64 %den) {
8; CHECK-LABEL: v_srem_i64:
9; CHECK:       ; %bb.0:
10; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; CHECK-NEXT:    v_mov_b32_e32 v5, v1
12; CHECK-NEXT:    v_mov_b32_e32 v4, v0
13; CHECK-NEXT:    v_or_b32_e32 v1, v5, v3
14; CHECK-NEXT:    v_mov_b32_e32 v0, 0
15; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
16; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
17; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
18; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
19; CHECK-NEXT:    s_cbranch_execnz .LBB0_3
20; CHECK-NEXT:  ; %bb.1: ; %Flow
21; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
22; CHECK-NEXT:    s_cbranch_execnz .LBB0_4
23; CHECK-NEXT:  .LBB0_2:
24; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
25; CHECK-NEXT:    s_setpc_b64 s[30:31]
26; CHECK-NEXT:  .LBB0_3:
27; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v3
28; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v1
29; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v3, v1, vcc
30; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
31; CHECK-NEXT:    v_xor_b32_e32 v1, v2, v1
32; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v0
33; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v1
34; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, 0, v0
35; CHECK-NEXT:    v_subb_u32_e32 v10, vcc, 0, v1, vcc
36; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
37; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
38; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
39; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
40; CHECK-NEXT:    v_trunc_f32_e32 v6, v3
41; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v6
42; CHECK-NEXT:    v_cvt_u32_f32_e32 v8, v2
43; CHECK-NEXT:    v_cvt_u32_f32_e32 v11, v6
44; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v9, v8, 0
45; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[3:4]
46; CHECK-NEXT:    v_mul_lo_u32 v3, v11, v2
47; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
48; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v2
49; CHECK-NEXT:    v_mul_hi_u32 v2, v11, v2
50; CHECK-NEXT:    v_mul_lo_u32 v12, v8, v6
51; CHECK-NEXT:    v_mul_lo_u32 v13, v11, v6
52; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v12
53; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
54; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
55; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v6
56; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
57; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v12, v3
58; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v13, v2
59; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
60; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
61; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
62; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
63; CHECK-NEXT:    v_mul_hi_u32 v6, v11, v6
64; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
65; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
66; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
67; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
68; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v2
69; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, v11, v3, vcc
70; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v9, v8, 0
71; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[3:4]
72; CHECK-NEXT:    v_ashrrev_i32_e32 v9, 31, v5
73; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v9
74; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
75; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v5, v9, vcc
76; CHECK-NEXT:    v_xor_b32_e32 v5, v3, v9
77; CHECK-NEXT:    v_mul_lo_u32 v3, v11, v2
78; CHECK-NEXT:    v_mul_lo_u32 v7, v8, v6
79; CHECK-NEXT:    v_xor_b32_e32 v10, v4, v9
80; CHECK-NEXT:    v_mul_hi_u32 v4, v8, v2
81; CHECK-NEXT:    v_mul_hi_u32 v2, v11, v2
82; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
83; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
84; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
85; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
86; CHECK-NEXT:    v_mul_lo_u32 v4, v11, v6
87; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
88; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v6
89; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
90; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
91; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
92; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
93; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
94; CHECK-NEXT:    v_mul_hi_u32 v6, v11, v6
95; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
96; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
97; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
98; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
99; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
100; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v11, v3, vcc
101; CHECK-NEXT:    v_mul_lo_u32 v4, v10, v2
102; CHECK-NEXT:    v_mul_lo_u32 v6, v5, v3
103; CHECK-NEXT:    v_mul_hi_u32 v7, v5, v2
104; CHECK-NEXT:    v_mul_hi_u32 v2, v10, v2
105; CHECK-NEXT:    v_mul_hi_u32 v8, v10, v3
106; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
107; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
108; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
109; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
110; CHECK-NEXT:    v_mul_lo_u32 v7, v10, v3
111; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
112; CHECK-NEXT:    v_mul_hi_u32 v6, v5, v3
113; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
114; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
115; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
116; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
117; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
118; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v2, v4
119; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v0, v7, 0
120; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
121; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
122; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
123; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4]
124; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v5, v2
125; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v1, v7, v[3:4]
126; CHECK-NEXT:    v_subb_u32_e64 v4, s[4:5], v10, v3, vcc
127; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v10, v3
128; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v1
129; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
130; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v0
131; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
132; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v4, v1
133; CHECK-NEXT:    v_subb_u32_e32 v3, vcc, v3, v1, vcc
134; CHECK-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
135; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v2, v0
136; CHECK-NEXT:    v_subbrev_u32_e64 v7, s[4:5], 0, v3, vcc
137; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
138; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
139; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v0
140; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
141; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v1
142; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v3, v1, vcc
143; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v6, v0
144; CHECK-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
145; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
146; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
147; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
148; CHECK-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
149; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
150; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
151; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
152; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v9
153; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v9
154; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v9
155; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v9, vcc
156; CHECK-NEXT:    ; implicit-def: $vgpr2
157; CHECK-NEXT:    ; implicit-def: $vgpr4
158; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
159; CHECK-NEXT:    s_cbranch_execz .LBB0_2
160; CHECK-NEXT:  .LBB0_4:
161; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v2
162; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
163; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
164; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
165; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
166; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v0
167; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
168; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
169; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
170; CHECK-NEXT:    v_mul_lo_u32 v0, v0, v2
171; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v4, v0
172; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
173; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
174; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
175; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
176; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
177; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
178; CHECK-NEXT:    v_mov_b32_e32 v1, 0
179; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
180; CHECK-NEXT:    s_setpc_b64 s[30:31]
181  %result = srem i64 %num, %den
182  ret i64 %result
183}
184
185; FIXME: This is a workaround for not handling uniform VGPR case.
186declare i32 @llvm.amdgcn.readfirstlane(i32)
187
188define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
189; CHECK-LABEL: s_srem_i64:
190; CHECK:       ; %bb.0:
191; CHECK-NEXT:    s_mov_b32 s6, 0
192; CHECK-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
193; CHECK-NEXT:    s_mov_b32 s7, -1
194; CHECK-NEXT:    s_and_b64 s[0:1], s[0:1], s[6:7]
195; CHECK-NEXT:    v_cmp_ne_u64_e64 vcc, s[0:1], 0
196; CHECK-NEXT:    s_mov_b32 s7, 1
197; CHECK-NEXT:    s_cbranch_vccz .LBB1_2
198; CHECK-NEXT:  ; %bb.1:
199; CHECK-NEXT:    s_ashr_i32 s6, s3, 31
200; CHECK-NEXT:    s_ashr_i32 s0, s5, 31
201; CHECK-NEXT:    s_add_u32 s10, s2, s6
202; CHECK-NEXT:    s_addc_u32 s11, s3, s6
203; CHECK-NEXT:    s_add_u32 s8, s4, s0
204; CHECK-NEXT:    s_mov_b32 s1, s0
205; CHECK-NEXT:    s_addc_u32 s9, s5, s0
206; CHECK-NEXT:    s_xor_b64 s[8:9], s[8:9], s[0:1]
207; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s8
208; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, s9
209; CHECK-NEXT:    s_mov_b32 s7, s6
210; CHECK-NEXT:    s_xor_b64 s[10:11], s[10:11], s[6:7]
211; CHECK-NEXT:    s_sub_u32 s3, 0, s8
212; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
213; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
214; CHECK-NEXT:    s_subb_u32 s5, 0, s9
215; CHECK-NEXT:    s_mov_b32 s7, 0
216; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
217; CHECK-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
218; CHECK-NEXT:    v_trunc_f32_e32 v2, v1
219; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
220; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v0
221; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v2
222; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0
223; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
224; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v0
225; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s5, v3, v[1:2]
226; CHECK-NEXT:    v_mul_lo_u32 v2, v4, v0
227; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
228; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v1
229; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v1
230; CHECK-NEXT:    v_mul_hi_u32 v8, v3, v1
231; CHECK-NEXT:    v_mul_hi_u32 v1, v4, v1
232; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
233; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
234; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
235; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
236; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
237; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
238; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
239; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
240; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
241; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
242; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
243; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
244; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
245; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
246; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v0
247; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v1, vcc
248; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0
249; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2]
250; CHECK-NEXT:    v_mul_hi_u32 v6, v3, v0
251; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s5, v3, v[1:2]
252; CHECK-NEXT:    v_mul_lo_u32 v2, v4, v0
253; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
254; CHECK-NEXT:    v_mul_lo_u32 v5, v3, v1
255; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
256; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
257; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
258; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
259; CHECK-NEXT:    v_mul_lo_u32 v6, v4, v1
260; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
261; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v1
262; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
263; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
264; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
265; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
266; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
267; CHECK-NEXT:    v_mul_hi_u32 v1, v4, v1
268; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
269; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
270; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
271; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
272; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
273; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v4, v1, vcc
274; CHECK-NEXT:    v_mul_lo_u32 v2, s11, v0
275; CHECK-NEXT:    v_mul_lo_u32 v3, s10, v1
276; CHECK-NEXT:    v_mul_hi_u32 v5, s10, v0
277; CHECK-NEXT:    v_mul_hi_u32 v0, s11, v0
278; CHECK-NEXT:    v_mul_hi_u32 v6, s11, v1
279; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
280; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
281; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
282; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
283; CHECK-NEXT:    v_mul_lo_u32 v5, s11, v1
284; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
285; CHECK-NEXT:    v_mul_hi_u32 v3, s10, v1
286; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
287; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
288; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
289; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
290; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
291; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v0, v2
292; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v5, 0
293; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
294; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
295; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
296; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s8, v2, v[1:2]
297; CHECK-NEXT:    v_mov_b32_e32 v3, s11
298; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, s10, v0
299; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s9, v5, v[1:2]
300; CHECK-NEXT:    v_mov_b32_e32 v4, s9
301; CHECK-NEXT:    v_subb_u32_e64 v2, s[0:1], v3, v1, vcc
302; CHECK-NEXT:    v_sub_i32_e64 v1, s[0:1], s11, v1
303; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v2
304; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[0:1]
305; CHECK-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v0
306; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
307; CHECK-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v2
308; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
309; CHECK-NEXT:    v_cndmask_b32_e64 v2, v3, v5, s[0:1]
310; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s8, v0
311; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
312; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s9, v1
313; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
314; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s8, v3
315; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
316; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s9, v1
317; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v5, vcc
318; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s8, v3
319; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
320; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
321; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
322; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
323; CHECK-NEXT:    v_xor_b32_e32 v0, s6, v0
324; CHECK-NEXT:    v_subrev_i32_e32 v0, vcc, s6, v0
325; CHECK-NEXT:    s_branch .LBB1_3
326; CHECK-NEXT:  .LBB1_2:
327; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
328; CHECK-NEXT:  .LBB1_3: ; %Flow
329; CHECK-NEXT:    s_xor_b32 s0, s7, 1
330; CHECK-NEXT:    s_and_b32 s0, s0, 1
331; CHECK-NEXT:    s_cmp_lg_u32 s0, 0
332; CHECK-NEXT:    s_cbranch_scc1 .LBB1_5
333; CHECK-NEXT:  ; %bb.4:
334; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s4
335; CHECK-NEXT:    s_sub_i32 s0, 0, s4
336; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
337; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
338; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
339; CHECK-NEXT:    v_mul_lo_u32 v1, s0, v0
340; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
341; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
342; CHECK-NEXT:    v_mul_hi_u32 v0, s2, v0
343; CHECK-NEXT:    v_mul_lo_u32 v0, v0, s4
344; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
345; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s4, v0
346; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
347; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
348; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s4, v0
349; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
350; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
351; CHECK-NEXT:  .LBB1_5:
352; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
353; CHECK-NEXT:    s_mov_b32 s1, s0
354; CHECK-NEXT:    ; return to shader part epilog
355  %result = srem i64 %num, %den
356  %cast = bitcast i64 %result to <2 x i32>
357  %elt.0 = extractelement <2 x i32> %cast, i32 0
358  %elt.1 = extractelement <2 x i32> %cast, i32 1
359  %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
360  %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
361  %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
362  %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
363  %cast.back = bitcast <2 x i32> %ins.1 to i64
364  ret i64 %cast.back
365}
366
367define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
368; GISEL-LABEL: v_srem_v2i64:
369; GISEL:       ; %bb.0:
370; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371; GISEL-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
372; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
373; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v5, v8, vcc
374; GISEL-NEXT:    v_xor_b32_e32 v5, v4, v8
375; GISEL-NEXT:    v_xor_b32_e32 v8, v9, v8
376; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v5
377; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v8
378; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, 0, v5
379; GISEL-NEXT:    v_subb_u32_e32 v14, vcc, 0, v8, vcc
380; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v9
381; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
382; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
383; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v4
384; GISEL-NEXT:    v_trunc_f32_e32 v11, v9
385; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v11
386; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v4
387; GISEL-NEXT:    v_cvt_u32_f32_e32 v15, v11
388; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v13, v12, 0
389; GISEL-NEXT:    v_mov_b32_e32 v4, v10
390; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v13, v15, v[4:5]
391; GISEL-NEXT:    v_mul_lo_u32 v4, v15, v9
392; GISEL-NEXT:    v_mul_hi_u32 v16, v12, v9
393; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v12, v[10:11]
394; GISEL-NEXT:    v_mul_hi_u32 v9, v15, v9
395; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v10
396; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
397; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
398; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v16
399; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
400; GISEL-NEXT:    v_mul_lo_u32 v16, v15, v10
401; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
402; GISEL-NEXT:    v_mul_hi_u32 v11, v12, v10
403; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v16, v9
404; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
405; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
406; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
407; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
408; GISEL-NEXT:    v_mul_hi_u32 v10, v15, v10
409; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
410; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
411; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
412; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
413; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v4
414; GISEL-NEXT:    v_addc_u32_e32 v15, vcc, v15, v9, vcc
415; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v13, v12, 0
416; GISEL-NEXT:    v_mov_b32_e32 v4, v10
417; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v13, v15, v[4:5]
418; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
419; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
420; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v12, v[10:11]
421; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
422; GISEL-NEXT:    v_xor_b32_e32 v13, v0, v4
423; GISEL-NEXT:    v_mul_lo_u32 v0, v15, v9
424; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v10
425; GISEL-NEXT:    v_xor_b32_e32 v14, v1, v4
426; GISEL-NEXT:    v_mul_hi_u32 v1, v12, v9
427; GISEL-NEXT:    v_mul_hi_u32 v9, v15, v9
428; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v11
429; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
430; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
431; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
432; GISEL-NEXT:    v_mul_lo_u32 v1, v15, v10
433; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
434; GISEL-NEXT:    v_mul_hi_u32 v11, v12, v10
435; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
436; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
437; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
438; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
439; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
440; GISEL-NEXT:    v_mul_hi_u32 v10, v15, v10
441; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
442; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
443; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
444; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
445; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v12, v0
446; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v15, v1, vcc
447; GISEL-NEXT:    v_mul_lo_u32 v9, v14, v0
448; GISEL-NEXT:    v_mul_lo_u32 v10, v13, v1
449; GISEL-NEXT:    v_mul_hi_u32 v11, v13, v0
450; GISEL-NEXT:    v_mul_hi_u32 v0, v14, v0
451; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
452; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
453; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
454; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
455; GISEL-NEXT:    v_mul_lo_u32 v11, v14, v1
456; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
457; GISEL-NEXT:    v_mul_hi_u32 v10, v13, v1
458; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
459; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
460; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
461; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
462; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
463; GISEL-NEXT:    v_mul_hi_u32 v1, v14, v1
464; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v9
465; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
466; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
467; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v1, v0
468; GISEL-NEXT:    v_ashrrev_i32_e32 v10, 31, v7
469; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
470; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v10, vcc
471; GISEL-NEXT:    v_xor_b32_e32 v6, v6, v10
472; GISEL-NEXT:    v_xor_b32_e32 v7, v7, v10
473; GISEL-NEXT:    v_cvt_f32_u32_e32 v12, v6
474; GISEL-NEXT:    v_cvt_f32_u32_e32 v15, v7
475; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
476; GISEL-NEXT:    v_sub_i32_e32 v16, vcc, 0, v6
477; GISEL-NEXT:    v_mac_f32_e32 v12, 0x4f800000, v15
478; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v5, v9, v[1:2]
479; GISEL-NEXT:    v_rcp_iflag_f32_e32 v1, v12
480; GISEL-NEXT:    v_subb_u32_e32 v17, vcc, 0, v7, vcc
481; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v8, v11, v[9:10]
482; GISEL-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
483; GISEL-NEXT:    v_mul_f32_e32 v10, 0x2f800000, v1
484; GISEL-NEXT:    v_trunc_f32_e32 v12, v10
485; GISEL-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v12
486; GISEL-NEXT:    v_cvt_u32_f32_e32 v15, v1
487; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v12
488; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, v13, v0
489; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v16, v15, 0
490; GISEL-NEXT:    v_mov_b32_e32 v0, v11
491; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v16, v12, v[0:1]
492; GISEL-NEXT:    v_subb_u32_e64 v11, s[4:5], v14, v9, vcc
493; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v17, v15, v[0:1]
494; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v14, v9
495; GISEL-NEXT:    v_mul_lo_u32 v9, v12, v10
496; GISEL-NEXT:    v_mul_lo_u32 v14, v15, v0
497; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v11, v8
498; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v8, vcc
499; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v14
500; GISEL-NEXT:    v_mul_hi_u32 v14, v15, v10
501; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
502; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v10
503; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v14
504; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
505; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v13, v5
506; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[6:7]
507; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v11, v8
508; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v14, s[6:7]
509; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v13, v5
510; GISEL-NEXT:    v_subbrev_u32_e64 v19, s[6:7], 0, v1, vcc
511; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v14, v5
512; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v19, v8
513; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v8, vcc
514; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, -1, s[8:9]
515; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, -1, s[6:7]
516; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v19, v8
517; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v14, v5
518; GISEL-NEXT:    v_cndmask_b32_e64 v20, v20, v21, s[6:7]
519; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
520; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v20
521; GISEL-NEXT:    v_cndmask_b32_e32 v5, v14, v5, vcc
522; GISEL-NEXT:    v_cndmask_b32_e32 v8, v19, v1, vcc
523; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
524; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v0
525; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v18, v1
526; GISEL-NEXT:    v_mul_hi_u32 v18, v15, v0
527; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v14, v10
528; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
529; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v18
530; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
531; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v18
532; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
533; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
534; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
535; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v14, v10
536; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
537; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v15, v1
538; GISEL-NEXT:    v_addc_u32_e32 v12, vcc, v12, v0, vcc
539; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v16, v10, 0
540; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
541; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v8, vcc
542; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v16, v12, v[1:2]
543; GISEL-NEXT:    v_cndmask_b32_e32 v5, v13, v5, vcc
544; GISEL-NEXT:    v_xor_b32_e32 v1, v5, v4
545; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v17, v10, v[8:9]
546; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v3
547; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
548; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
549; GISEL-NEXT:    v_xor_b32_e32 v13, v2, v5
550; GISEL-NEXT:    v_mul_lo_u32 v2, v12, v0
551; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
552; GISEL-NEXT:    v_xor_b32_e32 v14, v3, v5
553; GISEL-NEXT:    v_mul_hi_u32 v3, v10, v0
554; GISEL-NEXT:    v_mul_hi_u32 v0, v12, v0
555; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
556; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
557; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
558; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
559; GISEL-NEXT:    v_mul_lo_u32 v3, v12, v8
560; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
561; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
562; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
563; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
564; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
565; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
566; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
567; GISEL-NEXT:    v_mul_hi_u32 v8, v12, v8
568; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
569; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
570; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
571; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
572; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
573; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v12, v2, vcc
574; GISEL-NEXT:    v_mul_lo_u32 v3, v14, v0
575; GISEL-NEXT:    v_mul_lo_u32 v8, v13, v2
576; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v0
577; GISEL-NEXT:    v_mul_hi_u32 v0, v14, v0
578; GISEL-NEXT:    v_xor_b32_e32 v10, v11, v4
579; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
580; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
581; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
582; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
583; GISEL-NEXT:    v_mul_lo_u32 v9, v14, v2
584; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
585; GISEL-NEXT:    v_mul_hi_u32 v8, v13, v2
586; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
587; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
588; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
589; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
590; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
591; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v3
592; GISEL-NEXT:    v_mul_hi_u32 v9, v14, v2
593; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v11, 0
594; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
595; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
596; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v0
597; GISEL-NEXT:    v_mov_b32_e32 v0, v3
598; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v6, v8, v[0:1]
599; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
600; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v10, v4, vcc
601; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v7, v11, v[8:9]
602; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v13, v2
603; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v14, v3, vcc
604; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v14, v3
605; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v7
606; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
607; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
608; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
609; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v4, v7
610; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
611; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[4:5]
612; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v2, v6
613; GISEL-NEXT:    v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc
614; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v7
615; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
616; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v6
617; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
618; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
619; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v7
620; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v9, v6
621; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[4:5]
622; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
623; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
624; GISEL-NEXT:    v_cndmask_b32_e32 v6, v9, v6, vcc
625; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
626; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
627; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
628; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
629; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
630; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
631; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
632; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v5, vcc
633; GISEL-NEXT:    s_setpc_b64 s[30:31]
634;
635; CGP-LABEL: v_srem_v2i64:
636; CGP:       ; %bb.0:
637; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
638; CGP-NEXT:    v_mov_b32_e32 v11, v1
639; CGP-NEXT:    v_mov_b32_e32 v10, v0
640; CGP-NEXT:    v_or_b32_e32 v1, v11, v5
641; CGP-NEXT:    v_mov_b32_e32 v0, 0
642; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
643; CGP-NEXT:    v_mov_b32_e32 v8, v2
644; CGP-NEXT:    v_mov_b32_e32 v9, v3
645; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
646; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
647; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
648; CGP-NEXT:    s_cbranch_execz .LBB2_2
649; CGP-NEXT:  ; %bb.1:
650; CGP-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
651; CGP-NEXT:    v_add_i32_e32 v0, vcc, v4, v1
652; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v5, v1, vcc
653; CGP-NEXT:    v_xor_b32_e32 v0, v0, v1
654; CGP-NEXT:    v_xor_b32_e32 v1, v2, v1
655; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v0
656; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v1
657; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v0
658; CGP-NEXT:    v_subb_u32_e32 v13, vcc, 0, v1, vcc
659; CGP-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
660; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v2
661; CGP-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
662; CGP-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
663; CGP-NEXT:    v_trunc_f32_e32 v4, v3
664; CGP-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
665; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v2
666; CGP-NEXT:    v_cvt_u32_f32_e32 v14, v4
667; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v12, v5, 0
668; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v12, v14, v[3:4]
669; CGP-NEXT:    v_mul_hi_u32 v15, v5, v2
670; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[3:4]
671; CGP-NEXT:    v_mul_lo_u32 v4, v14, v2
672; CGP-NEXT:    v_mul_hi_u32 v2, v14, v2
673; CGP-NEXT:    v_mul_lo_u32 v16, v5, v3
674; CGP-NEXT:    v_mul_lo_u32 v17, v14, v3
675; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v16
676; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
677; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v15
678; CGP-NEXT:    v_mul_hi_u32 v15, v5, v3
679; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
680; CGP-NEXT:    v_add_i32_e32 v4, vcc, v16, v4
681; CGP-NEXT:    v_add_i32_e32 v2, vcc, v17, v2
682; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
683; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v15
684; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
685; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
686; CGP-NEXT:    v_mul_hi_u32 v3, v14, v3
687; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
688; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
689; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
690; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
691; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v2
692; CGP-NEXT:    v_addc_u32_e32 v14, vcc, v14, v3, vcc
693; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v12, v5, 0
694; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v12, v14, v[3:4]
695; CGP-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
696; CGP-NEXT:    v_mul_hi_u32 v15, v5, v2
697; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v13, v5, v[3:4]
698; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v12
699; CGP-NEXT:    v_addc_u32_e32 v10, vcc, v11, v12, vcc
700; CGP-NEXT:    v_xor_b32_e32 v11, v4, v12
701; CGP-NEXT:    v_mul_lo_u32 v4, v14, v2
702; CGP-NEXT:    v_mul_lo_u32 v13, v5, v3
703; CGP-NEXT:    v_mul_hi_u32 v2, v14, v2
704; CGP-NEXT:    v_xor_b32_e32 v10, v10, v12
705; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
706; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
707; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v15
708; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
709; CGP-NEXT:    v_mul_lo_u32 v15, v14, v3
710; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
711; CGP-NEXT:    v_mul_hi_u32 v13, v5, v3
712; CGP-NEXT:    v_add_i32_e32 v2, vcc, v15, v2
713; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
714; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v13
715; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
716; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
717; CGP-NEXT:    v_mul_hi_u32 v3, v14, v3
718; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
719; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
720; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
721; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
722; CGP-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
723; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v14, v3, vcc
724; CGP-NEXT:    v_mul_lo_u32 v4, v10, v2
725; CGP-NEXT:    v_mul_lo_u32 v5, v11, v3
726; CGP-NEXT:    v_mul_hi_u32 v13, v11, v2
727; CGP-NEXT:    v_mul_hi_u32 v2, v10, v2
728; CGP-NEXT:    v_mul_hi_u32 v14, v10, v3
729; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
730; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
731; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
732; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
733; CGP-NEXT:    v_mul_lo_u32 v13, v10, v3
734; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
735; CGP-NEXT:    v_mul_hi_u32 v5, v11, v3
736; CGP-NEXT:    v_add_i32_e32 v2, vcc, v13, v2
737; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
738; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
739; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
740; CGP-NEXT:    v_add_i32_e32 v5, vcc, v13, v5
741; CGP-NEXT:    v_add_i32_e32 v13, vcc, v2, v4
742; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v0, v13, 0
743; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
744; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
745; CGP-NEXT:    v_add_i32_e32 v4, vcc, v14, v4
746; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4]
747; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v11, v2
748; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v1, v13, v[3:4]
749; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v10, v3, vcc
750; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v10, v3
751; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v1
752; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
753; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v0
754; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
755; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v4, v1
756; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v1, vcc
757; CGP-NEXT:    v_cndmask_b32_e64 v5, v5, v10, s[4:5]
758; CGP-NEXT:    v_sub_i32_e32 v10, vcc, v2, v0
759; CGP-NEXT:    v_subbrev_u32_e64 v11, s[4:5], 0, v3, vcc
760; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v1
761; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
762; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v0
763; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[4:5]
764; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v11, v1
765; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v3, v1, vcc
766; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v10, v0
767; CGP-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[4:5]
768; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
769; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
770; CGP-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
771; CGP-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
772; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
773; CGP-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
774; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
775; CGP-NEXT:    v_xor_b32_e32 v0, v0, v12
776; CGP-NEXT:    v_xor_b32_e32 v1, v1, v12
777; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v12
778; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v12, vcc
779; CGP-NEXT:    ; implicit-def: $vgpr4
780; CGP-NEXT:    ; implicit-def: $vgpr10
781; CGP-NEXT:  .LBB2_2: ; %Flow1
782; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
783; CGP-NEXT:    s_cbranch_execz .LBB2_4
784; CGP-NEXT:  ; %bb.3:
785; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
786; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v4
787; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
788; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
789; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
790; CGP-NEXT:    v_mul_lo_u32 v1, v1, v0
791; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
792; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
793; CGP-NEXT:    v_mul_hi_u32 v0, v10, v0
794; CGP-NEXT:    v_mul_lo_u32 v0, v0, v4
795; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v10, v0
796; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
797; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
798; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
799; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
800; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
801; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
802; CGP-NEXT:    v_mov_b32_e32 v1, 0
803; CGP-NEXT:  .LBB2_4:
804; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
805; CGP-NEXT:    v_or_b32_e32 v3, v9, v7
806; CGP-NEXT:    v_mov_b32_e32 v2, 0
807; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
808; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
809; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
810; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
811; CGP-NEXT:    s_cbranch_execnz .LBB2_7
812; CGP-NEXT:  ; %bb.5: ; %Flow
813; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
814; CGP-NEXT:    s_cbranch_execnz .LBB2_8
815; CGP-NEXT:  .LBB2_6:
816; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
817; CGP-NEXT:    s_setpc_b64 s[30:31]
818; CGP-NEXT:  .LBB2_7:
819; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v7
820; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v3
821; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v7, v3, vcc
822; CGP-NEXT:    v_xor_b32_e32 v2, v2, v3
823; CGP-NEXT:    v_xor_b32_e32 v3, v4, v3
824; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
825; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v3
826; CGP-NEXT:    v_sub_i32_e32 v10, vcc, 0, v2
827; CGP-NEXT:    v_subb_u32_e32 v11, vcc, 0, v3, vcc
828; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
829; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
830; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
831; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
832; CGP-NEXT:    v_trunc_f32_e32 v6, v5
833; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
834; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v4
835; CGP-NEXT:    v_cvt_u32_f32_e32 v12, v6
836; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v10, v7, 0
837; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[5:6]
838; CGP-NEXT:    v_mul_hi_u32 v13, v7, v4
839; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v7, v[5:6]
840; CGP-NEXT:    v_mul_lo_u32 v6, v12, v4
841; CGP-NEXT:    v_mul_hi_u32 v4, v12, v4
842; CGP-NEXT:    v_mul_lo_u32 v14, v7, v5
843; CGP-NEXT:    v_mul_lo_u32 v15, v12, v5
844; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
845; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
846; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
847; CGP-NEXT:    v_mul_hi_u32 v13, v7, v5
848; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
849; CGP-NEXT:    v_add_i32_e32 v6, vcc, v14, v6
850; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
851; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
852; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
853; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
854; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
855; CGP-NEXT:    v_mul_hi_u32 v5, v12, v5
856; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
857; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
858; CGP-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
859; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
860; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v4
861; CGP-NEXT:    v_addc_u32_e32 v12, vcc, v12, v5, vcc
862; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v10, v7, 0
863; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v10, v12, v[5:6]
864; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
865; CGP-NEXT:    v_mul_hi_u32 v13, v7, v4
866; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v7, v[5:6]
867; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v10
868; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v9, v10, vcc
869; CGP-NEXT:    v_xor_b32_e32 v9, v6, v10
870; CGP-NEXT:    v_mul_lo_u32 v6, v12, v4
871; CGP-NEXT:    v_mul_lo_u32 v11, v7, v5
872; CGP-NEXT:    v_mul_hi_u32 v4, v12, v4
873; CGP-NEXT:    v_xor_b32_e32 v8, v8, v10
874; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
875; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
876; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
877; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
878; CGP-NEXT:    v_mul_lo_u32 v13, v12, v5
879; CGP-NEXT:    v_add_i32_e32 v6, vcc, v11, v6
880; CGP-NEXT:    v_mul_hi_u32 v11, v7, v5
881; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
882; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
883; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
884; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
885; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
886; CGP-NEXT:    v_mul_hi_u32 v5, v12, v5
887; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
888; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
889; CGP-NEXT:    v_add_i32_e32 v6, vcc, v11, v6
890; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
891; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
892; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v12, v5, vcc
893; CGP-NEXT:    v_mul_lo_u32 v6, v8, v4
894; CGP-NEXT:    v_mul_lo_u32 v7, v9, v5
895; CGP-NEXT:    v_mul_hi_u32 v11, v9, v4
896; CGP-NEXT:    v_mul_hi_u32 v4, v8, v4
897; CGP-NEXT:    v_mul_hi_u32 v12, v8, v5
898; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
899; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
900; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
901; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
902; CGP-NEXT:    v_mul_lo_u32 v11, v8, v5
903; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
904; CGP-NEXT:    v_mul_hi_u32 v7, v9, v5
905; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
906; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
907; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
908; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
909; CGP-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
910; CGP-NEXT:    v_add_i32_e32 v11, vcc, v4, v6
911; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v11, 0
912; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
913; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
914; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
915; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[5:6]
916; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v9, v4
917; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v11, v[5:6]
918; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v8, v5, vcc
919; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v8, v5
920; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
921; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
922; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v2
923; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
924; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v6, v3
925; CGP-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
926; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
927; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v4, v2
928; CGP-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v5, vcc
929; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v3
930; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
931; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v2
932; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
933; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v3
934; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v5, v3, vcc
935; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
936; CGP-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[4:5]
937; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
938; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
939; CGP-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
940; CGP-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
941; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
942; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
943; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
944; CGP-NEXT:    v_xor_b32_e32 v2, v2, v10
945; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
946; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
947; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v10, vcc
948; CGP-NEXT:    ; implicit-def: $vgpr6
949; CGP-NEXT:    ; implicit-def: $vgpr8
950; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
951; CGP-NEXT:    s_cbranch_execz .LBB2_6
952; CGP-NEXT:  .LBB2_8:
953; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v6
954; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v6
955; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v2
956; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
957; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
958; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
959; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
960; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
961; CGP-NEXT:    v_mul_hi_u32 v2, v8, v2
962; CGP-NEXT:    v_mul_lo_u32 v2, v2, v6
963; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
964; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v6
965; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
966; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
967; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v6
968; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
969; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
970; CGP-NEXT:    v_mov_b32_e32 v3, 0
971; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
972; CGP-NEXT:    s_setpc_b64 s[30:31]
973  %result = srem <2 x i64> %num, %den
974  ret <2 x i64> %result
975}
976
977define i64 @v_srem_i64_pow2k_denom(i64 %num) {
978; CHECK-LABEL: v_srem_i64_pow2k_denom:
979; CHECK:       ; %bb.0:
980; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
981; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x1000
982; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
983; CHECK-NEXT:    v_mov_b32_e32 v6, 0xfffff000
984; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
985; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
986; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
987; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
988; CHECK-NEXT:    v_trunc_f32_e32 v4, v3
989; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
990; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v2
991; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v4
992; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
993; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
994; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v2
995; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
996; CHECK-NEXT:    v_mul_lo_u32 v4, v7, v2
997; CHECK-NEXT:    v_mul_hi_u32 v2, v7, v2
998; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v3
999; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v3
1000; CHECK-NEXT:    v_mul_hi_u32 v11, v5, v3
1001; CHECK-NEXT:    v_mul_hi_u32 v3, v7, v3
1002; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1003; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1004; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v10, v2
1005; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1006; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1007; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1008; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
1009; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
1010; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1011; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
1012; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1013; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1014; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
1015; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
1016; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v2
1017; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v3, vcc
1018; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
1019; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
1020; CHECK-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
1021; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
1022; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
1023; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
1024; CHECK-NEXT:    v_xor_b32_e32 v4, v0, v6
1025; CHECK-NEXT:    v_mul_lo_u32 v0, v7, v2
1026; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v3
1027; CHECK-NEXT:    v_xor_b32_e32 v9, v1, v6
1028; CHECK-NEXT:    v_mul_hi_u32 v1, v5, v2
1029; CHECK-NEXT:    v_mul_hi_u32 v2, v7, v2
1030; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
1031; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1032; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
1033; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
1034; CHECK-NEXT:    v_mul_lo_u32 v1, v7, v3
1035; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
1036; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v3
1037; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
1038; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1039; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
1040; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1041; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
1042; CHECK-NEXT:    v_mul_hi_u32 v3, v7, v3
1043; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
1044; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1045; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
1046; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
1047; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
1048; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v7, v1, vcc
1049; CHECK-NEXT:    v_mul_lo_u32 v2, v9, v0
1050; CHECK-NEXT:    v_mul_lo_u32 v3, v4, v1
1051; CHECK-NEXT:    v_mul_hi_u32 v7, v4, v0
1052; CHECK-NEXT:    v_mul_hi_u32 v0, v9, v0
1053; CHECK-NEXT:    v_mov_b32_e32 v5, 0x1000
1054; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
1055; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
1056; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
1057; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1058; CHECK-NEXT:    v_mul_lo_u32 v7, v9, v1
1059; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1060; CHECK-NEXT:    v_mul_hi_u32 v3, v4, v1
1061; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
1062; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1063; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
1064; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
1065; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
1066; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1067; CHECK-NEXT:    v_mul_hi_u32 v7, v9, v1
1068; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0
1069; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1070; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1071; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
1072; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2]
1073; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v4, v0
1074; CHECK-NEXT:    v_subb_u32_e64 v2, vcc, v9, v1, s[4:5]
1075; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v9, v1
1076; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
1077; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
1078; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
1079; CHECK-NEXT:    v_cndmask_b32_e32 v3, -1, v3, vcc
1080; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v5
1081; CHECK-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
1082; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1083; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v5
1084; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
1085; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1086; CHECK-NEXT:    v_cndmask_b32_e32 v5, -1, v5, vcc
1087; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, 0x1000, v4
1088; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
1089; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
1090; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
1091; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
1092; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
1093; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1094; CHECK-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1095; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v6
1096; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v6
1097; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
1098; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
1099; CHECK-NEXT:    s_setpc_b64 s[30:31]
1100  %result = srem i64 %num, 4096
1101  ret i64 %result
1102}
1103
1104define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) {
1105; GISEL-LABEL: v_srem_v2i64_pow2k_denom:
1106; GISEL:       ; %bb.0:
1107; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1108; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
1109; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
1110; GISEL-NEXT:    v_mov_b32_e32 v6, 0xfffff000
1111; GISEL-NEXT:    s_mov_b32 s6, 1
1112; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
1113; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
1114; GISEL-NEXT:    s_cmp_lg_u32 s6, 0
1115; GISEL-NEXT:    s_subb_u32 s6, 0, 0
1116; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
1117; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
1118; GISEL-NEXT:    v_trunc_f32_e32 v8, v5
1119; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v8
1120; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v4
1121; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
1122; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0
1123; GISEL-NEXT:    v_mov_b32_e32 v9, v5
1124; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v6, v8, v[9:10]
1125; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v4
1126; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v4
1127; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[9:10]
1128; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
1129; GISEL-NEXT:    v_mul_lo_u32 v13, v7, v9
1130; GISEL-NEXT:    v_mul_lo_u32 v4, v8, v9
1131; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v10, v13
1132; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1133; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v11
1134; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1135; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
1136; GISEL-NEXT:    v_mul_hi_u32 v14, v7, v9
1137; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
1138; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1139; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
1140; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1141; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
1142; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
1143; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
1144; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1145; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
1146; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
1147; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v7, v4
1148; GISEL-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
1149; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, v8, v9, vcc
1150; GISEL-NEXT:    v_mov_b32_e32 v4, v14
1151; GISEL-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
1152; GISEL-NEXT:    v_mul_lo_u32 v4, v17, v13
1153; GISEL-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15]
1154; GISEL-NEXT:    s_mov_b32 s6, 1
1155; GISEL-NEXT:    s_cmp_lg_u32 s6, 0
1156; GISEL-NEXT:    v_mul_lo_u32 v9, v16, v14
1157; GISEL-NEXT:    s_subb_u32 s6, 0, 0
1158; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1159; GISEL-NEXT:    v_mul_hi_u32 v9, v16, v13
1160; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1161; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1162; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1163; GISEL-NEXT:    v_mul_hi_u32 v9, v17, v13
1164; GISEL-NEXT:    v_mul_lo_u32 v13, v17, v14
1165; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
1166; GISEL-NEXT:    v_mul_hi_u32 v15, v16, v14
1167; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1168; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1169; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v9, v15
1170; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1171; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v9
1172; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v1
1173; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
1174; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v9, vcc
1175; GISEL-NEXT:    v_xor_b32_e32 v18, v0, v9
1176; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v4
1177; GISEL-NEXT:    v_mul_hi_u32 v4, v17, v14
1178; GISEL-NEXT:    v_xor_b32_e32 v19, v1, v9
1179; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1180; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
1181; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
1182; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
1183; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v17, v1, vcc
1184; GISEL-NEXT:    v_mul_lo_u32 v13, v19, v0
1185; GISEL-NEXT:    v_mul_lo_u32 v14, v18, v1
1186; GISEL-NEXT:    v_mul_hi_u32 v15, v18, v0
1187; GISEL-NEXT:    v_mul_hi_u32 v0, v19, v0
1188; GISEL-NEXT:    v_mov_b32_e32 v4, 0x1000
1189; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1190; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1191; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1192; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1193; GISEL-NEXT:    v_mul_lo_u32 v15, v19, v1
1194; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
1195; GISEL-NEXT:    v_mul_hi_u32 v14, v18, v1
1196; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
1197; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1198; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
1199; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1200; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
1201; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
1202; GISEL-NEXT:    v_mul_hi_u32 v15, v19, v1
1203; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
1204; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1205; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
1206; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
1207; GISEL-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
1208; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v18, v0
1209; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v19, v13
1210; GISEL-NEXT:    v_subb_u32_e64 v15, s[4:5], v19, v13, vcc
1211; GISEL-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
1212; GISEL-NEXT:    v_sub_i32_e32 v16, vcc, v14, v4
1213; GISEL-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
1214; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v16, v4
1215; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v4
1216; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
1217; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
1218; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
1219; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
1220; GISEL-NEXT:    v_cndmask_b32_e32 v18, -1, v0, vcc
1221; GISEL-NEXT:    v_mov_b32_e32 v0, v5
1222; GISEL-NEXT:    v_cndmask_b32_e64 v13, -1, v1, s[4:5]
1223; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1]
1224; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1]
1225; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v16, v4
1226; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v17, vcc
1227; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v18
1228; GISEL-NEXT:    v_mul_lo_u32 v18, v7, v0
1229; GISEL-NEXT:    v_cndmask_b32_e32 v16, v16, v1, vcc
1230; GISEL-NEXT:    v_cndmask_b32_e32 v5, v17, v5, vcc
1231; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v18
1232; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1233; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
1234; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1235; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v0
1236; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
1237; GISEL-NEXT:    v_mul_hi_u32 v10, v7, v0
1238; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1239; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1240; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1241; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1242; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1243; GISEL-NEXT:    v_mul_hi_u32 v0, v8, v0
1244; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
1245; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1246; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1247; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
1248; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v1
1249; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v0, vcc
1250; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
1251; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
1252; GISEL-NEXT:    v_cndmask_b32_e32 v11, v15, v5, vcc
1253; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2]
1254; GISEL-NEXT:    v_xor_b32_e32 v1, v11, v9
1255; GISEL-NEXT:    v_ashrrev_i32_e32 v11, 31, v3
1256; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6]
1257; GISEL-NEXT:    v_cndmask_b32_e32 v10, v14, v16, vcc
1258; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
1259; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v11, vcc
1260; GISEL-NEXT:    v_xor_b32_e32 v12, v2, v11
1261; GISEL-NEXT:    v_mul_lo_u32 v2, v8, v0
1262; GISEL-NEXT:    v_mul_lo_u32 v6, v7, v5
1263; GISEL-NEXT:    v_xor_b32_e32 v13, v3, v11
1264; GISEL-NEXT:    v_mul_hi_u32 v3, v7, v0
1265; GISEL-NEXT:    v_mul_hi_u32 v0, v8, v0
1266; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
1267; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1268; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
1269; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1270; GISEL-NEXT:    v_mul_lo_u32 v3, v8, v5
1271; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
1272; GISEL-NEXT:    v_mul_hi_u32 v6, v7, v5
1273; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
1274; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
1275; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
1276; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1277; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
1278; GISEL-NEXT:    v_mul_hi_u32 v5, v8, v5
1279; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1280; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1281; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1282; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
1283; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v7, v0
1284; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v8, v2, vcc
1285; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v3
1286; GISEL-NEXT:    v_mul_lo_u32 v6, v12, v2
1287; GISEL-NEXT:    v_xor_b32_e32 v10, v10, v9
1288; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v3
1289; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v10, v9
1290; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v9, vcc
1291; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
1292; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1293; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
1294; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1295; GISEL-NEXT:    v_mul_lo_u32 v7, v13, v2
1296; GISEL-NEXT:    v_mul_hi_u32 v3, v13, v3
1297; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
1298; GISEL-NEXT:    v_mul_hi_u32 v6, v12, v2
1299; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
1300; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1301; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
1302; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1303; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
1304; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
1305; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v2
1306; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
1307; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1308; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
1309; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1310; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
1311; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v12, v2
1312; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
1313; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
1314; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
1315; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v2, v4
1316; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
1317; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v4
1318; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
1319; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
1320; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
1321; GISEL-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
1322; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v7, v4
1323; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
1324; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
1325; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
1326; GISEL-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
1327; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
1328; GISEL-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
1329; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
1330; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
1331; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1332; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
1333; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v11
1334; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v11
1335; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v11
1336; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v11, vcc
1337; GISEL-NEXT:    s_setpc_b64 s[30:31]
1338;
1339; CGP-LABEL: v_srem_v2i64_pow2k_denom:
1340; CGP:       ; %bb.0:
1341; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1342; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
1343; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
1344; CGP-NEXT:    v_mov_b32_e32 v6, 0xfffff000
1345; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
1346; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
1347; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
1348; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
1349; CGP-NEXT:    v_trunc_f32_e32 v8, v5
1350; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v8
1351; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v4
1352; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
1353; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0
1354; CGP-NEXT:    v_mov_b32_e32 v9, v5
1355; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v6, v8, v[9:10]
1356; CGP-NEXT:    v_mul_hi_u32 v11, v7, v4
1357; CGP-NEXT:    v_mul_hi_u32 v12, v8, v4
1358; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], -1, v7, v[9:10]
1359; CGP-NEXT:    v_mul_lo_u32 v10, v8, v4
1360; CGP-NEXT:    v_mul_lo_u32 v4, v7, v9
1361; CGP-NEXT:    v_mul_lo_u32 v13, v8, v9
1362; CGP-NEXT:    v_mul_hi_u32 v14, v7, v9
1363; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
1364; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
1365; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1366; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
1367; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1368; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
1369; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v12
1370; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1371; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1372; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1373; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
1374; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
1375; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1376; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
1377; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
1378; CGP-NEXT:    v_add_i32_e32 v16, vcc, v7, v4
1379; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
1380; CGP-NEXT:    v_addc_u32_e32 v17, vcc, v8, v9, vcc
1381; CGP-NEXT:    v_mov_b32_e32 v4, v14
1382; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
1383; CGP-NEXT:    v_mul_lo_u32 v4, v17, v13
1384; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
1385; CGP-NEXT:    v_mul_lo_u32 v9, v16, v14
1386; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1387; CGP-NEXT:    v_mul_hi_u32 v9, v16, v13
1388; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1389; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1390; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1391; CGP-NEXT:    v_mul_hi_u32 v9, v17, v13
1392; CGP-NEXT:    v_mul_lo_u32 v13, v17, v14
1393; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
1394; CGP-NEXT:    v_mul_hi_u32 v15, v16, v14
1395; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1396; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1397; CGP-NEXT:    v_add_i32_e32 v15, vcc, v9, v15
1398; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1399; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v9
1400; CGP-NEXT:    v_ashrrev_i32_e32 v9, 31, v1
1401; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
1402; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v9, vcc
1403; CGP-NEXT:    v_xor_b32_e32 v18, v0, v9
1404; CGP-NEXT:    v_add_i32_e32 v0, vcc, v15, v4
1405; CGP-NEXT:    v_mul_hi_u32 v4, v17, v14
1406; CGP-NEXT:    v_xor_b32_e32 v19, v1, v9
1407; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1408; CGP-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
1409; CGP-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
1410; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
1411; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v17, v1, vcc
1412; CGP-NEXT:    v_mul_lo_u32 v13, v19, v0
1413; CGP-NEXT:    v_mul_lo_u32 v14, v18, v1
1414; CGP-NEXT:    v_mul_hi_u32 v15, v18, v0
1415; CGP-NEXT:    v_mul_hi_u32 v0, v19, v0
1416; CGP-NEXT:    v_mov_b32_e32 v4, 0x1000
1417; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1418; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1419; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1420; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1421; CGP-NEXT:    v_mul_lo_u32 v15, v19, v1
1422; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
1423; CGP-NEXT:    v_mul_hi_u32 v14, v18, v1
1424; CGP-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
1425; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1426; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
1427; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1428; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
1429; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
1430; CGP-NEXT:    v_mul_hi_u32 v15, v19, v1
1431; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
1432; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1433; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
1434; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
1435; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
1436; CGP-NEXT:    v_sub_i32_e32 v14, vcc, v18, v0
1437; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v19, v13
1438; CGP-NEXT:    v_subb_u32_e64 v15, s[4:5], v19, v13, vcc
1439; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
1440; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v4
1441; CGP-NEXT:    v_sub_i32_e32 v16, vcc, v14, v4
1442; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
1443; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
1444; CGP-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
1445; CGP-NEXT:    v_mov_b32_e32 v0, v5
1446; CGP-NEXT:    v_cndmask_b32_e64 v13, -1, v1, s[4:5]
1447; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1]
1448; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v16, v4
1449; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, -1, vcc
1450; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1]
1451; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
1452; CGP-NEXT:    v_cndmask_b32_e32 v5, -1, v18, vcc
1453; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v16, v4
1454; CGP-NEXT:    v_subbrev_u32_e32 v18, vcc, 0, v17, vcc
1455; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
1456; CGP-NEXT:    v_mul_lo_u32 v5, v7, v0
1457; CGP-NEXT:    v_cndmask_b32_e32 v16, v16, v1, vcc
1458; CGP-NEXT:    v_cndmask_b32_e32 v17, v17, v18, vcc
1459; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v5
1460; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1461; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
1462; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1463; CGP-NEXT:    v_mul_lo_u32 v10, v8, v0
1464; CGP-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
1465; CGP-NEXT:    v_mul_hi_u32 v5, v7, v0
1466; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
1467; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1468; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
1469; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1470; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1471; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
1472; CGP-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
1473; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1474; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
1475; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
1476; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v1
1477; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v8, v0, vcc
1478; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
1479; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
1480; CGP-NEXT:    v_cndmask_b32_e32 v5, v14, v16, vcc
1481; CGP-NEXT:    v_xor_b32_e32 v11, v5, v9
1482; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2]
1483; CGP-NEXT:    v_cndmask_b32_e32 v10, v15, v17, vcc
1484; CGP-NEXT:    v_xor_b32_e32 v1, v10, v9
1485; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], -1, v7, v[5:6]
1486; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
1487; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
1488; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
1489; CGP-NEXT:    v_xor_b32_e32 v12, v2, v10
1490; CGP-NEXT:    v_mul_lo_u32 v2, v8, v0
1491; CGP-NEXT:    v_mul_lo_u32 v6, v7, v5
1492; CGP-NEXT:    v_xor_b32_e32 v13, v3, v10
1493; CGP-NEXT:    v_mul_hi_u32 v3, v7, v0
1494; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
1495; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
1496; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1497; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
1498; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1499; CGP-NEXT:    v_mul_lo_u32 v3, v8, v5
1500; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
1501; CGP-NEXT:    v_mul_hi_u32 v6, v7, v5
1502; CGP-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
1503; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
1504; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
1505; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1506; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
1507; CGP-NEXT:    v_mul_hi_u32 v5, v8, v5
1508; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1509; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1510; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1511; CGP-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
1512; CGP-NEXT:    v_add_i32_e32 v3, vcc, v7, v0
1513; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v8, v2, vcc
1514; CGP-NEXT:    v_mul_lo_u32 v5, v13, v3
1515; CGP-NEXT:    v_mul_lo_u32 v6, v12, v2
1516; CGP-NEXT:    v_mul_hi_u32 v7, v12, v3
1517; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v11, v9
1518; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v9, vcc
1519; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
1520; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1521; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
1522; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1523; CGP-NEXT:    v_mul_lo_u32 v7, v13, v2
1524; CGP-NEXT:    v_mul_hi_u32 v3, v13, v3
1525; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
1526; CGP-NEXT:    v_mul_hi_u32 v6, v12, v2
1527; CGP-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
1528; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1529; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
1530; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1531; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
1532; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
1533; CGP-NEXT:    v_mul_hi_u32 v7, v13, v2
1534; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
1535; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1536; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
1537; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1538; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
1539; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v12, v2
1540; CGP-NEXT:    v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
1541; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
1542; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
1543; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v2, v4
1544; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
1545; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v4
1546; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
1547; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
1548; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
1549; CGP-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
1550; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v7, v4
1551; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
1552; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
1553; CGP-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
1554; CGP-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
1555; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
1556; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
1557; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
1558; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
1559; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1560; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
1561; CGP-NEXT:    v_xor_b32_e32 v2, v2, v10
1562; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
1563; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
1564; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v10, vcc
1565; CGP-NEXT:    s_setpc_b64 s[30:31]
1566  %result = srem <2 x i64> %num, <i64 4096, i64 4096>
1567  ret <2 x i64> %result
1568}
1569
1570define i64 @v_srem_i64_oddk_denom(i64 %num) {
1571; CHECK-LABEL: v_srem_i64_oddk_denom:
1572; CHECK:       ; %bb.0:
1573; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1574; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x12d8fb
1575; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
1576; CHECK-NEXT:    v_mov_b32_e32 v6, 0xffed2705
1577; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
1578; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1579; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
1580; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
1581; CHECK-NEXT:    v_trunc_f32_e32 v4, v3
1582; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
1583; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v2
1584; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v4
1585; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
1586; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
1587; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v2
1588; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
1589; CHECK-NEXT:    v_mul_lo_u32 v4, v7, v2
1590; CHECK-NEXT:    v_mul_hi_u32 v2, v7, v2
1591; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v3
1592; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v3
1593; CHECK-NEXT:    v_mul_hi_u32 v11, v5, v3
1594; CHECK-NEXT:    v_mul_hi_u32 v3, v7, v3
1595; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1596; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1597; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v10, v2
1598; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1599; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1600; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1601; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
1602; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
1603; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1604; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
1605; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1606; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1607; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
1608; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
1609; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v2
1610; CHECK-NEXT:    v_addc_u32_e32 v7, vcc, v7, v3, vcc
1611; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
1612; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
1613; CHECK-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
1614; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
1615; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
1616; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v1, v6, vcc
1617; CHECK-NEXT:    v_xor_b32_e32 v4, v0, v6
1618; CHECK-NEXT:    v_mul_lo_u32 v0, v7, v2
1619; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v3
1620; CHECK-NEXT:    v_xor_b32_e32 v9, v1, v6
1621; CHECK-NEXT:    v_mul_hi_u32 v1, v5, v2
1622; CHECK-NEXT:    v_mul_hi_u32 v2, v7, v2
1623; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
1624; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1625; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
1626; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
1627; CHECK-NEXT:    v_mul_lo_u32 v1, v7, v3
1628; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
1629; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v3
1630; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
1631; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1632; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
1633; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1634; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
1635; CHECK-NEXT:    v_mul_hi_u32 v3, v7, v3
1636; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
1637; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1638; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
1639; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
1640; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
1641; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v7, v1, vcc
1642; CHECK-NEXT:    v_mul_lo_u32 v2, v9, v0
1643; CHECK-NEXT:    v_mul_lo_u32 v3, v4, v1
1644; CHECK-NEXT:    v_mul_hi_u32 v7, v4, v0
1645; CHECK-NEXT:    v_mul_hi_u32 v0, v9, v0
1646; CHECK-NEXT:    v_mov_b32_e32 v5, 0x12d8fb
1647; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
1648; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
1649; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
1650; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1651; CHECK-NEXT:    v_mul_lo_u32 v7, v9, v1
1652; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1653; CHECK-NEXT:    v_mul_hi_u32 v3, v4, v1
1654; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
1655; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1656; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
1657; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
1658; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
1659; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1660; CHECK-NEXT:    v_mul_hi_u32 v7, v9, v1
1661; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0
1662; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1663; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1664; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
1665; CHECK-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v5, v2, v[1:2]
1666; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v4, v0
1667; CHECK-NEXT:    v_subb_u32_e64 v2, vcc, v9, v1, s[4:5]
1668; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v9, v1
1669; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
1670; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
1671; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
1672; CHECK-NEXT:    v_cndmask_b32_e32 v3, -1, v3, vcc
1673; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v5
1674; CHECK-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
1675; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1676; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v5
1677; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
1678; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1679; CHECK-NEXT:    v_cndmask_b32_e32 v5, -1, v5, vcc
1680; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, 0x12d8fb, v4
1681; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
1682; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
1683; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
1684; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
1685; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
1686; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1687; CHECK-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1688; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v6
1689; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v6
1690; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
1691; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
1692; CHECK-NEXT:    s_setpc_b64 s[30:31]
1693  %result = srem i64 %num, 1235195
1694  ret i64 %result
1695}
1696
1697define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) {
1698; GISEL-LABEL: v_srem_v2i64_oddk_denom:
1699; GISEL:       ; %bb.0:
1700; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1701; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
1702; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
1703; GISEL-NEXT:    v_mov_b32_e32 v6, 0xffed2705
1704; GISEL-NEXT:    s_mov_b32 s6, 1
1705; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
1706; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
1707; GISEL-NEXT:    s_cmp_lg_u32 s6, 0
1708; GISEL-NEXT:    s_subb_u32 s6, 0, 0
1709; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
1710; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
1711; GISEL-NEXT:    v_trunc_f32_e32 v8, v5
1712; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v8
1713; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v4
1714; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
1715; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0
1716; GISEL-NEXT:    v_mov_b32_e32 v9, v5
1717; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v6, v8, v[9:10]
1718; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v4
1719; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v4
1720; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[9:10]
1721; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
1722; GISEL-NEXT:    v_mul_lo_u32 v13, v7, v9
1723; GISEL-NEXT:    v_mul_lo_u32 v4, v8, v9
1724; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v10, v13
1725; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1726; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v11
1727; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1728; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
1729; GISEL-NEXT:    v_mul_hi_u32 v14, v7, v9
1730; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
1731; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1732; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
1733; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1734; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
1735; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
1736; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
1737; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1738; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
1739; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
1740; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v7, v4
1741; GISEL-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
1742; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, v8, v9, vcc
1743; GISEL-NEXT:    v_mov_b32_e32 v4, v14
1744; GISEL-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
1745; GISEL-NEXT:    v_mul_lo_u32 v4, v17, v13
1746; GISEL-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15]
1747; GISEL-NEXT:    s_mov_b32 s6, 1
1748; GISEL-NEXT:    s_cmp_lg_u32 s6, 0
1749; GISEL-NEXT:    v_mul_lo_u32 v9, v16, v14
1750; GISEL-NEXT:    s_subb_u32 s6, 0, 0
1751; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1752; GISEL-NEXT:    v_mul_hi_u32 v9, v16, v13
1753; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1754; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1755; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1756; GISEL-NEXT:    v_mul_hi_u32 v9, v17, v13
1757; GISEL-NEXT:    v_mul_lo_u32 v13, v17, v14
1758; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
1759; GISEL-NEXT:    v_mul_hi_u32 v15, v16, v14
1760; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1761; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1762; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v9, v15
1763; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1764; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v9
1765; GISEL-NEXT:    v_ashrrev_i32_e32 v9, 31, v1
1766; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
1767; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v9, vcc
1768; GISEL-NEXT:    v_xor_b32_e32 v18, v0, v9
1769; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v4
1770; GISEL-NEXT:    v_mul_hi_u32 v4, v17, v14
1771; GISEL-NEXT:    v_xor_b32_e32 v19, v1, v9
1772; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1773; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
1774; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
1775; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
1776; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v17, v1, vcc
1777; GISEL-NEXT:    v_mul_lo_u32 v13, v19, v0
1778; GISEL-NEXT:    v_mul_lo_u32 v14, v18, v1
1779; GISEL-NEXT:    v_mul_hi_u32 v15, v18, v0
1780; GISEL-NEXT:    v_mul_hi_u32 v0, v19, v0
1781; GISEL-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
1782; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1783; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1784; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1785; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1786; GISEL-NEXT:    v_mul_lo_u32 v15, v19, v1
1787; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
1788; GISEL-NEXT:    v_mul_hi_u32 v14, v18, v1
1789; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
1790; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1791; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
1792; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1793; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
1794; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
1795; GISEL-NEXT:    v_mul_hi_u32 v15, v19, v1
1796; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
1797; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1798; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
1799; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
1800; GISEL-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
1801; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v18, v0
1802; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v19, v13
1803; GISEL-NEXT:    v_subb_u32_e64 v15, s[4:5], v19, v13, vcc
1804; GISEL-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
1805; GISEL-NEXT:    v_sub_i32_e32 v16, vcc, v14, v4
1806; GISEL-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
1807; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v16, v4
1808; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v4
1809; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
1810; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
1811; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
1812; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
1813; GISEL-NEXT:    v_cndmask_b32_e32 v18, -1, v0, vcc
1814; GISEL-NEXT:    v_mov_b32_e32 v0, v5
1815; GISEL-NEXT:    v_cndmask_b32_e64 v13, -1, v1, s[4:5]
1816; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1]
1817; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1]
1818; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v16, v4
1819; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v17, vcc
1820; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v18
1821; GISEL-NEXT:    v_mul_lo_u32 v18, v7, v0
1822; GISEL-NEXT:    v_cndmask_b32_e32 v16, v16, v1, vcc
1823; GISEL-NEXT:    v_cndmask_b32_e32 v5, v17, v5, vcc
1824; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v18
1825; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1826; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
1827; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
1828; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v0
1829; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
1830; GISEL-NEXT:    v_mul_hi_u32 v10, v7, v0
1831; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1832; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1833; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1834; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1835; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1836; GISEL-NEXT:    v_mul_hi_u32 v0, v8, v0
1837; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
1838; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1839; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1840; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
1841; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v1
1842; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, v8, v0, vcc
1843; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
1844; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
1845; GISEL-NEXT:    v_cndmask_b32_e32 v11, v15, v5, vcc
1846; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2]
1847; GISEL-NEXT:    v_xor_b32_e32 v1, v11, v9
1848; GISEL-NEXT:    v_ashrrev_i32_e32 v11, 31, v3
1849; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6]
1850; GISEL-NEXT:    v_cndmask_b32_e32 v10, v14, v16, vcc
1851; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
1852; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v11, vcc
1853; GISEL-NEXT:    v_xor_b32_e32 v12, v2, v11
1854; GISEL-NEXT:    v_mul_lo_u32 v2, v8, v0
1855; GISEL-NEXT:    v_mul_lo_u32 v6, v7, v5
1856; GISEL-NEXT:    v_xor_b32_e32 v13, v3, v11
1857; GISEL-NEXT:    v_mul_hi_u32 v3, v7, v0
1858; GISEL-NEXT:    v_mul_hi_u32 v0, v8, v0
1859; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
1860; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1861; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
1862; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1863; GISEL-NEXT:    v_mul_lo_u32 v3, v8, v5
1864; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
1865; GISEL-NEXT:    v_mul_hi_u32 v6, v7, v5
1866; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
1867; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
1868; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
1869; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1870; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
1871; GISEL-NEXT:    v_mul_hi_u32 v5, v8, v5
1872; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1873; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1874; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1875; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
1876; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v7, v0
1877; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v8, v2, vcc
1878; GISEL-NEXT:    v_mul_lo_u32 v5, v13, v3
1879; GISEL-NEXT:    v_mul_lo_u32 v6, v12, v2
1880; GISEL-NEXT:    v_xor_b32_e32 v10, v10, v9
1881; GISEL-NEXT:    v_mul_hi_u32 v7, v12, v3
1882; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v10, v9
1883; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v9, vcc
1884; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
1885; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1886; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
1887; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1888; GISEL-NEXT:    v_mul_lo_u32 v7, v13, v2
1889; GISEL-NEXT:    v_mul_hi_u32 v3, v13, v3
1890; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
1891; GISEL-NEXT:    v_mul_hi_u32 v6, v12, v2
1892; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
1893; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1894; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
1895; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1896; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
1897; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
1898; GISEL-NEXT:    v_mul_hi_u32 v7, v13, v2
1899; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
1900; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1901; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
1902; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1903; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
1904; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v12, v2
1905; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
1906; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
1907; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
1908; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v2, v4
1909; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
1910; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v4
1911; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
1912; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
1913; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
1914; GISEL-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
1915; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v7, v4
1916; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
1917; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
1918; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
1919; GISEL-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
1920; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
1921; GISEL-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
1922; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
1923; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
1924; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1925; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
1926; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v11
1927; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v11
1928; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v11
1929; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v11, vcc
1930; GISEL-NEXT:    s_setpc_b64 s[30:31]
1931;
1932; CGP-LABEL: v_srem_v2i64_oddk_denom:
1933; CGP:       ; %bb.0:
1934; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1935; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
1936; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
1937; CGP-NEXT:    v_mov_b32_e32 v6, 0xffed2705
1938; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
1939; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
1940; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
1941; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
1942; CGP-NEXT:    v_trunc_f32_e32 v8, v5
1943; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v8
1944; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v4
1945; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
1946; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0
1947; CGP-NEXT:    v_mov_b32_e32 v9, v5
1948; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v6, v8, v[9:10]
1949; CGP-NEXT:    v_mul_hi_u32 v11, v7, v4
1950; CGP-NEXT:    v_mul_hi_u32 v12, v8, v4
1951; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], -1, v7, v[9:10]
1952; CGP-NEXT:    v_mul_lo_u32 v10, v8, v4
1953; CGP-NEXT:    v_mul_lo_u32 v4, v7, v9
1954; CGP-NEXT:    v_mul_lo_u32 v13, v8, v9
1955; CGP-NEXT:    v_mul_hi_u32 v14, v7, v9
1956; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
1957; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
1958; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1959; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
1960; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1961; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
1962; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v12
1963; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1964; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1965; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1966; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
1967; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
1968; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1969; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
1970; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
1971; CGP-NEXT:    v_add_i32_e32 v16, vcc, v7, v4
1972; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
1973; CGP-NEXT:    v_addc_u32_e32 v17, vcc, v8, v9, vcc
1974; CGP-NEXT:    v_mov_b32_e32 v4, v14
1975; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
1976; CGP-NEXT:    v_mul_lo_u32 v4, v17, v13
1977; CGP-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
1978; CGP-NEXT:    v_mul_lo_u32 v9, v16, v14
1979; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1980; CGP-NEXT:    v_mul_hi_u32 v9, v16, v13
1981; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1982; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
1983; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1984; CGP-NEXT:    v_mul_hi_u32 v9, v17, v13
1985; CGP-NEXT:    v_mul_lo_u32 v13, v17, v14
1986; CGP-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
1987; CGP-NEXT:    v_mul_hi_u32 v15, v16, v14
1988; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1989; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1990; CGP-NEXT:    v_add_i32_e32 v15, vcc, v9, v15
1991; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1992; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v9
1993; CGP-NEXT:    v_ashrrev_i32_e32 v9, 31, v1
1994; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
1995; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v9, vcc
1996; CGP-NEXT:    v_xor_b32_e32 v18, v0, v9
1997; CGP-NEXT:    v_add_i32_e32 v0, vcc, v15, v4
1998; CGP-NEXT:    v_mul_hi_u32 v4, v17, v14
1999; CGP-NEXT:    v_xor_b32_e32 v19, v1, v9
2000; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
2001; CGP-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
2002; CGP-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
2003; CGP-NEXT:    v_add_i32_e32 v0, vcc, v16, v0
2004; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v17, v1, vcc
2005; CGP-NEXT:    v_mul_lo_u32 v13, v19, v0
2006; CGP-NEXT:    v_mul_lo_u32 v14, v18, v1
2007; CGP-NEXT:    v_mul_hi_u32 v15, v18, v0
2008; CGP-NEXT:    v_mul_hi_u32 v0, v19, v0
2009; CGP-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
2010; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2011; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2012; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
2013; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2014; CGP-NEXT:    v_mul_lo_u32 v15, v19, v1
2015; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
2016; CGP-NEXT:    v_mul_hi_u32 v14, v18, v1
2017; CGP-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
2018; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2019; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
2020; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2021; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
2022; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
2023; CGP-NEXT:    v_mul_hi_u32 v15, v19, v1
2024; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
2025; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2026; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
2027; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
2028; CGP-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v4, v13, v[1:2]
2029; CGP-NEXT:    v_sub_i32_e32 v14, vcc, v18, v0
2030; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v19, v13
2031; CGP-NEXT:    v_subb_u32_e64 v15, s[4:5], v19, v13, vcc
2032; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
2033; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v14, v4
2034; CGP-NEXT:    v_sub_i32_e32 v16, vcc, v14, v4
2035; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[4:5]
2036; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v15
2037; CGP-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v0, vcc
2038; CGP-NEXT:    v_mov_b32_e32 v0, v5
2039; CGP-NEXT:    v_cndmask_b32_e64 v13, -1, v1, s[4:5]
2040; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1]
2041; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v16, v4
2042; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, -1, vcc
2043; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1]
2044; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
2045; CGP-NEXT:    v_cndmask_b32_e32 v5, -1, v18, vcc
2046; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v16, v4
2047; CGP-NEXT:    v_subbrev_u32_e32 v18, vcc, 0, v17, vcc
2048; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
2049; CGP-NEXT:    v_mul_lo_u32 v5, v7, v0
2050; CGP-NEXT:    v_cndmask_b32_e32 v16, v16, v1, vcc
2051; CGP-NEXT:    v_cndmask_b32_e32 v17, v17, v18, vcc
2052; CGP-NEXT:    v_add_i32_e32 v1, vcc, v10, v5
2053; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
2054; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v11
2055; CGP-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
2056; CGP-NEXT:    v_mul_lo_u32 v10, v8, v0
2057; CGP-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
2058; CGP-NEXT:    v_mul_hi_u32 v5, v7, v0
2059; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2060; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2061; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
2062; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2063; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2064; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
2065; CGP-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
2066; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
2067; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
2068; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
2069; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v1
2070; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v8, v0, vcc
2071; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0
2072; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
2073; CGP-NEXT:    v_cndmask_b32_e32 v5, v14, v16, vcc
2074; CGP-NEXT:    v_xor_b32_e32 v11, v5, v9
2075; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2]
2076; CGP-NEXT:    v_cndmask_b32_e32 v10, v15, v17, vcc
2077; CGP-NEXT:    v_xor_b32_e32 v1, v10, v9
2078; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], -1, v7, v[5:6]
2079; CGP-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
2080; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
2081; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v3, v10, vcc
2082; CGP-NEXT:    v_xor_b32_e32 v12, v2, v10
2083; CGP-NEXT:    v_mul_lo_u32 v2, v8, v0
2084; CGP-NEXT:    v_mul_lo_u32 v6, v7, v5
2085; CGP-NEXT:    v_xor_b32_e32 v13, v3, v10
2086; CGP-NEXT:    v_mul_hi_u32 v3, v7, v0
2087; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
2088; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
2089; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2090; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
2091; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2092; CGP-NEXT:    v_mul_lo_u32 v3, v8, v5
2093; CGP-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
2094; CGP-NEXT:    v_mul_hi_u32 v6, v7, v5
2095; CGP-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
2096; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
2097; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
2098; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2099; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
2100; CGP-NEXT:    v_mul_hi_u32 v5, v8, v5
2101; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
2102; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2103; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
2104; CGP-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
2105; CGP-NEXT:    v_add_i32_e32 v3, vcc, v7, v0
2106; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v8, v2, vcc
2107; CGP-NEXT:    v_mul_lo_u32 v5, v13, v3
2108; CGP-NEXT:    v_mul_lo_u32 v6, v12, v2
2109; CGP-NEXT:    v_mul_hi_u32 v7, v12, v3
2110; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v11, v9
2111; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v9, vcc
2112; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
2113; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2114; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
2115; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
2116; CGP-NEXT:    v_mul_lo_u32 v7, v13, v2
2117; CGP-NEXT:    v_mul_hi_u32 v3, v13, v3
2118; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
2119; CGP-NEXT:    v_mul_hi_u32 v6, v12, v2
2120; CGP-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
2121; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
2122; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
2123; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2124; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
2125; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
2126; CGP-NEXT:    v_mul_hi_u32 v7, v13, v2
2127; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
2128; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
2129; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
2130; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
2131; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v5, v[3:4]
2132; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v12, v2
2133; CGP-NEXT:    v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
2134; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
2135; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
2136; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v2, v4
2137; CGP-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
2138; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v4
2139; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
2140; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
2141; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
2142; CGP-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
2143; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v7, v4
2144; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
2145; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
2146; CGP-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
2147; CGP-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
2148; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
2149; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
2150; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
2151; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
2152; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2153; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
2154; CGP-NEXT:    v_xor_b32_e32 v2, v2, v10
2155; CGP-NEXT:    v_xor_b32_e32 v3, v3, v10
2156; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
2157; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v10, vcc
2158; CGP-NEXT:    s_setpc_b64 s[30:31]
2159  %result = srem <2 x i64> %num, <i64 1235195, i64 1235195>
2160  ret <2 x i64> %result
2161}
2162
2163define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) {
2164; CHECK-LABEL: v_srem_i64_pow2_shl_denom:
2165; CHECK:       ; %bb.0:
2166; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2167; CHECK-NEXT:    v_mov_b32_e32 v3, v0
2168; CHECK-NEXT:    v_mov_b32_e32 v4, v1
2169; CHECK-NEXT:    v_mov_b32_e32 v0, 0x1000
2170; CHECK-NEXT:    v_mov_b32_e32 v1, 0
2171; CHECK-NEXT:    v_lshl_b64 v[5:6], v[0:1], v2
2172; CHECK-NEXT:    v_mov_b32_e32 v0, 0
2173; CHECK-NEXT:    v_or_b32_e32 v1, v4, v6
2174; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
2175; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
2176; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2177; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
2178; CHECK-NEXT:    s_cbranch_execnz .LBB7_3
2179; CHECK-NEXT:  ; %bb.1: ; %Flow
2180; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
2181; CHECK-NEXT:    s_cbranch_execnz .LBB7_4
2182; CHECK-NEXT:  .LBB7_2:
2183; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
2184; CHECK-NEXT:    s_setpc_b64 s[30:31]
2185; CHECK-NEXT:  .LBB7_3:
2186; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v6
2187; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v5, v1
2188; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v6, v1, vcc
2189; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
2190; CHECK-NEXT:    v_xor_b32_e32 v1, v2, v1
2191; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v0
2192; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v1
2193; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, 0, v0
2194; CHECK-NEXT:    v_subb_u32_e32 v10, vcc, 0, v1, vcc
2195; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v5
2196; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
2197; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
2198; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v2
2199; CHECK-NEXT:    v_trunc_f32_e32 v7, v5
2200; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v7
2201; CHECK-NEXT:    v_cvt_u32_f32_e32 v8, v2
2202; CHECK-NEXT:    v_cvt_u32_f32_e32 v11, v7
2203; CHECK-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0
2204; CHECK-NEXT:    v_mov_b32_e32 v2, v6
2205; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[2:3]
2206; CHECK-NEXT:    v_mul_lo_u32 v2, v11, v5
2207; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
2208; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v5
2209; CHECK-NEXT:    v_mul_hi_u32 v5, v11, v5
2210; CHECK-NEXT:    v_mul_lo_u32 v12, v8, v6
2211; CHECK-NEXT:    v_mul_lo_u32 v13, v11, v6
2212; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
2213; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2214; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
2215; CHECK-NEXT:    v_mul_hi_u32 v7, v8, v6
2216; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2217; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v12, v2
2218; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v13, v5
2219; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2220; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
2221; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
2222; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
2223; CHECK-NEXT:    v_mul_hi_u32 v6, v11, v6
2224; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
2225; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
2226; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
2227; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
2228; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v2
2229; CHECK-NEXT:    v_addc_u32_e32 v11, vcc, v11, v5, vcc
2230; CHECK-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0
2231; CHECK-NEXT:    v_mov_b32_e32 v2, v6
2232; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[2:3]
2233; CHECK-NEXT:    v_ashrrev_i32_e32 v9, 31, v4
2234; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v9
2235; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7]
2236; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v4, v9, vcc
2237; CHECK-NEXT:    v_xor_b32_e32 v7, v2, v9
2238; CHECK-NEXT:    v_mul_lo_u32 v2, v11, v5
2239; CHECK-NEXT:    v_mul_lo_u32 v4, v8, v6
2240; CHECK-NEXT:    v_xor_b32_e32 v10, v3, v9
2241; CHECK-NEXT:    v_mul_hi_u32 v3, v8, v5
2242; CHECK-NEXT:    v_mul_hi_u32 v5, v11, v5
2243; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
2244; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2245; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
2246; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2247; CHECK-NEXT:    v_mul_lo_u32 v3, v11, v6
2248; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
2249; CHECK-NEXT:    v_mul_hi_u32 v4, v8, v6
2250; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
2251; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
2252; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
2253; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2254; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
2255; CHECK-NEXT:    v_mul_hi_u32 v5, v11, v6
2256; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
2257; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
2258; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
2259; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
2260; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
2261; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v11, v3, vcc
2262; CHECK-NEXT:    v_mul_lo_u32 v4, v10, v2
2263; CHECK-NEXT:    v_mul_lo_u32 v5, v7, v3
2264; CHECK-NEXT:    v_mul_hi_u32 v6, v7, v2
2265; CHECK-NEXT:    v_mul_hi_u32 v2, v10, v2
2266; CHECK-NEXT:    v_mul_hi_u32 v8, v10, v3
2267; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
2268; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
2269; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
2270; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2271; CHECK-NEXT:    v_mul_lo_u32 v6, v10, v3
2272; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
2273; CHECK-NEXT:    v_mul_hi_u32 v5, v7, v3
2274; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
2275; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2276; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
2277; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
2278; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
2279; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v2, v4
2280; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v0, v6, 0
2281; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2282; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
2283; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
2284; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[3:4]
2285; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v7, v2
2286; CHECK-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v1, v6, v[3:4]
2287; CHECK-NEXT:    v_subb_u32_e64 v4, s[4:5], v10, v3, vcc
2288; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v10, v3
2289; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v1
2290; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
2291; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v0
2292; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
2293; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v4, v1
2294; CHECK-NEXT:    v_subb_u32_e32 v3, vcc, v3, v1, vcc
2295; CHECK-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
2296; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v2, v0
2297; CHECK-NEXT:    v_subbrev_u32_e64 v7, s[4:5], 0, v3, vcc
2298; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
2299; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
2300; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v0
2301; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
2302; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v1
2303; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v3, v1, vcc
2304; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v6, v0
2305; CHECK-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
2306; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
2307; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
2308; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
2309; CHECK-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
2310; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
2311; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
2312; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
2313; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v9
2314; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v9
2315; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v9
2316; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v9, vcc
2317; CHECK-NEXT:    ; implicit-def: $vgpr5_vgpr6
2318; CHECK-NEXT:    ; implicit-def: $vgpr3
2319; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
2320; CHECK-NEXT:    s_cbranch_execz .LBB7_2
2321; CHECK-NEXT:  .LBB7_4:
2322; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v5
2323; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, 0, v5
2324; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2325; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2326; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
2327; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v0
2328; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
2329; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
2330; CHECK-NEXT:    v_mul_hi_u32 v0, v3, v0
2331; CHECK-NEXT:    v_mul_lo_u32 v0, v0, v5
2332; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v3, v0
2333; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v5
2334; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
2335; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2336; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v5
2337; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
2338; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2339; CHECK-NEXT:    v_mov_b32_e32 v1, 0
2340; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
2341; CHECK-NEXT:    s_setpc_b64 s[30:31]
2342  %shl.y = shl i64 4096, %y
2343  %r = srem i64 %x, %shl.y
2344  ret i64 %r
2345}
2346
2347define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
2348; GISEL-LABEL: v_srem_v2i64_pow2_shl_denom:
2349; GISEL:       ; %bb.0:
2350; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2351; GISEL-NEXT:    v_mov_b32_e32 v8, 0x1000
2352; GISEL-NEXT:    v_mov_b32_e32 v9, 0
2353; GISEL-NEXT:    v_lshl_b64 v[4:5], v[8:9], v4
2354; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v5
2355; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
2356; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, v5, v7, vcc
2357; GISEL-NEXT:    v_xor_b32_e32 v5, v4, v7
2358; GISEL-NEXT:    v_xor_b32_e32 v7, v10, v7
2359; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v5
2360; GISEL-NEXT:    v_cvt_f32_u32_e32 v10, v7
2361; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, 0, v5
2362; GISEL-NEXT:    v_subb_u32_e32 v15, vcc, 0, v7, vcc
2363; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v10
2364; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
2365; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
2366; GISEL-NEXT:    v_mul_f32_e32 v10, 0x2f800000, v4
2367; GISEL-NEXT:    v_trunc_f32_e32 v12, v10
2368; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v12
2369; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v4
2370; GISEL-NEXT:    v_cvt_u32_f32_e32 v16, v12
2371; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
2372; GISEL-NEXT:    v_mov_b32_e32 v4, v11
2373; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5]
2374; GISEL-NEXT:    v_mul_lo_u32 v4, v16, v10
2375; GISEL-NEXT:    v_mul_hi_u32 v17, v13, v10
2376; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
2377; GISEL-NEXT:    v_mul_hi_u32 v10, v16, v10
2378; GISEL-NEXT:    v_mul_lo_u32 v12, v13, v11
2379; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
2380; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2381; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v17
2382; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2383; GISEL-NEXT:    v_mul_lo_u32 v17, v16, v11
2384; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
2385; GISEL-NEXT:    v_mul_hi_u32 v12, v13, v11
2386; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v17, v10
2387; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
2388; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2389; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2390; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v17, v12
2391; GISEL-NEXT:    v_mul_hi_u32 v11, v16, v11
2392; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
2393; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2394; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
2395; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2396; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v4
2397; GISEL-NEXT:    v_addc_u32_e32 v16, vcc, v16, v10, vcc
2398; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
2399; GISEL-NEXT:    v_mov_b32_e32 v4, v11
2400; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5]
2401; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
2402; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
2403; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
2404; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
2405; GISEL-NEXT:    v_xor_b32_e32 v12, v0, v4
2406; GISEL-NEXT:    v_mul_lo_u32 v0, v16, v10
2407; GISEL-NEXT:    v_mul_lo_u32 v14, v13, v11
2408; GISEL-NEXT:    v_xor_b32_e32 v15, v1, v4
2409; GISEL-NEXT:    v_mul_hi_u32 v1, v13, v10
2410; GISEL-NEXT:    v_mul_hi_u32 v10, v16, v10
2411; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
2412; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2413; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
2414; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
2415; GISEL-NEXT:    v_mul_lo_u32 v1, v16, v11
2416; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v14, v0
2417; GISEL-NEXT:    v_mul_hi_u32 v14, v13, v11
2418; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v10
2419; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2420; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
2421; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2422; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
2423; GISEL-NEXT:    v_mul_hi_u32 v11, v16, v11
2424; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
2425; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
2426; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
2427; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v11, v1
2428; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v0
2429; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v16, v1, vcc
2430; GISEL-NEXT:    v_mul_lo_u32 v13, v15, v10
2431; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v11
2432; GISEL-NEXT:    v_lshl_b64 v[0:1], v[8:9], v6
2433; GISEL-NEXT:    v_mul_hi_u32 v6, v12, v10
2434; GISEL-NEXT:    v_mul_hi_u32 v10, v15, v10
2435; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v13, v14
2436; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2437; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
2438; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2439; GISEL-NEXT:    v_mul_lo_u32 v8, v15, v11
2440; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
2441; GISEL-NEXT:    v_mul_hi_u32 v9, v12, v11
2442; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2443; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2444; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
2445; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2446; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
2447; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v8, v6
2448; GISEL-NEXT:    v_mul_hi_u32 v8, v15, v11
2449; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2450; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
2451; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v8, v6
2452; GISEL-NEXT:    v_ashrrev_i32_e32 v8, 31, v1
2453; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
2454; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v1, v8, vcc
2455; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v5, v13, 0
2456; GISEL-NEXT:    v_xor_b32_e32 v6, v0, v8
2457; GISEL-NEXT:    v_xor_b32_e32 v8, v1, v8
2458; GISEL-NEXT:    v_cvt_f32_u32_e32 v14, v6
2459; GISEL-NEXT:    v_cvt_f32_u32_e32 v16, v8
2460; GISEL-NEXT:    v_mov_b32_e32 v0, v10
2461; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v11, v[0:1]
2462; GISEL-NEXT:    v_mac_f32_e32 v14, 0x4f800000, v16
2463; GISEL-NEXT:    v_rcp_iflag_f32_e32 v10, v14
2464; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v7, v13, v[0:1]
2465; GISEL-NEXT:    v_sub_i32_e32 v16, vcc, 0, v6
2466; GISEL-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v10
2467; GISEL-NEXT:    v_mul_f32_e32 v10, 0x2f800000, v1
2468; GISEL-NEXT:    v_trunc_f32_e32 v13, v10
2469; GISEL-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v13
2470; GISEL-NEXT:    v_cvt_u32_f32_e32 v14, v1
2471; GISEL-NEXT:    v_cvt_u32_f32_e32 v13, v13
2472; GISEL-NEXT:    v_subb_u32_e32 v17, vcc, 0, v8, vcc
2473; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v16, v14, 0
2474; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v12, v9
2475; GISEL-NEXT:    v_mov_b32_e32 v1, v11
2476; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v16, v13, v[1:2]
2477; GISEL-NEXT:    v_mul_lo_u32 v1, v13, v10
2478; GISEL-NEXT:    v_subb_u32_e64 v18, s[4:5], v15, v0, vcc
2479; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v17, v14, v[11:12]
2480; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v15, v0
2481; GISEL-NEXT:    v_mul_lo_u32 v12, v14, v11
2482; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v18, v7
2483; GISEL-NEXT:    v_subb_u32_e32 v0, vcc, v0, v7, vcc
2484; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
2485; GISEL-NEXT:    v_mul_hi_u32 v12, v14, v10
2486; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2487; GISEL-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
2488; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s[6:7]
2489; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v9, v5
2490; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[6:7]
2491; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v18, v7
2492; GISEL-NEXT:    v_cndmask_b32_e64 v12, v1, v12, s[6:7]
2493; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v9, v5
2494; GISEL-NEXT:    v_subbrev_u32_e64 v19, s[6:7], 0, v0, vcc
2495; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v5
2496; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v19, v7
2497; GISEL-NEXT:    v_subb_u32_e32 v0, vcc, v0, v7, vcc
2498; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, -1, s[8:9]
2499; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, -1, s[6:7]
2500; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], v19, v7
2501; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v1, v5
2502; GISEL-NEXT:    v_cndmask_b32_e64 v20, v20, v21, s[6:7]
2503; GISEL-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
2504; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v20
2505; GISEL-NEXT:    v_cndmask_b32_e32 v5, v1, v5, vcc
2506; GISEL-NEXT:    v_cndmask_b32_e32 v7, v19, v0, vcc
2507; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2508; GISEL-NEXT:    v_mul_hi_u32 v1, v13, v10
2509; GISEL-NEXT:    v_mul_lo_u32 v10, v13, v11
2510; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v15, v0
2511; GISEL-NEXT:    v_mul_hi_u32 v15, v14, v11
2512; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
2513; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2514; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v15
2515; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2516; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
2517; GISEL-NEXT:    v_mul_hi_u32 v11, v13, v11
2518; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
2519; GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
2520; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
2521; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v11, v1
2522; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v0
2523; GISEL-NEXT:    v_addc_u32_e32 v13, vcc, v13, v1, vcc
2524; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v16, v11, 0
2525; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
2526; GISEL-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
2527; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v16, v13, v[1:2]
2528; GISEL-NEXT:    v_xor_b32_e32 v1, v5, v4
2529; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v3
2530; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v17, v11, v[9:10]
2531; GISEL-NEXT:    v_cndmask_b32_e32 v7, v18, v7, vcc
2532; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
2533; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
2534; GISEL-NEXT:    v_xor_b32_e32 v12, v2, v5
2535; GISEL-NEXT:    v_mul_lo_u32 v2, v13, v0
2536; GISEL-NEXT:    v_mul_lo_u32 v10, v11, v9
2537; GISEL-NEXT:    v_xor_b32_e32 v14, v3, v5
2538; GISEL-NEXT:    v_mul_hi_u32 v3, v11, v0
2539; GISEL-NEXT:    v_mul_hi_u32 v0, v13, v0
2540; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
2541; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2542; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
2543; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2544; GISEL-NEXT:    v_mul_lo_u32 v3, v13, v9
2545; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v10, v2
2546; GISEL-NEXT:    v_mul_hi_u32 v10, v11, v9
2547; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
2548; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
2549; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
2550; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2551; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v10
2552; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
2553; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
2554; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2555; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
2556; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
2557; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
2558; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v13, v2, vcc
2559; GISEL-NEXT:    v_mul_lo_u32 v3, v14, v0
2560; GISEL-NEXT:    v_mul_lo_u32 v9, v12, v2
2561; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v0
2562; GISEL-NEXT:    v_mul_hi_u32 v0, v14, v0
2563; GISEL-NEXT:    v_xor_b32_e32 v7, v7, v4
2564; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
2565; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2566; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v10
2567; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
2568; GISEL-NEXT:    v_mul_lo_u32 v10, v14, v2
2569; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v9, v3
2570; GISEL-NEXT:    v_mul_hi_u32 v9, v12, v2
2571; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
2572; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2573; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
2574; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2575; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
2576; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v0, v3
2577; GISEL-NEXT:    v_mul_hi_u32 v10, v14, v2
2578; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v11, 0
2579; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
2580; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
2581; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v0
2582; GISEL-NEXT:    v_mov_b32_e32 v0, v3
2583; GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v6, v9, v[0:1]
2584; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v1, v4
2585; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v7, v4, vcc
2586; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v8, v11, v[9:10]
2587; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v12, v2
2588; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], v14, v3, vcc
2589; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v14, v3
2590; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v8
2591; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
2592; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
2593; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
2594; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v4, v8
2595; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v8, vcc
2596; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[4:5]
2597; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v2, v6
2598; GISEL-NEXT:    v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc
2599; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v8
2600; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
2601; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v6
2602; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v8, vcc
2603; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
2604; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v10, v8
2605; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v9, v6
2606; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[4:5]
2607; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2608; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
2609; GISEL-NEXT:    v_cndmask_b32_e32 v6, v9, v6, vcc
2610; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
2611; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
2612; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
2613; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
2614; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
2615; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
2616; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
2617; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v5, vcc
2618; GISEL-NEXT:    s_setpc_b64 s[30:31]
2619;
2620; CGP-LABEL: v_srem_v2i64_pow2_shl_denom:
2621; CGP:       ; %bb.0:
2622; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2623; CGP-NEXT:    v_mov_b32_e32 v5, v2
2624; CGP-NEXT:    v_mov_b32_e32 v7, v3
2625; CGP-NEXT:    v_mov_b32_e32 v2, 0x1000
2626; CGP-NEXT:    v_mov_b32_e32 v3, 0
2627; CGP-NEXT:    v_lshl_b64 v[11:12], v[2:3], v4
2628; CGP-NEXT:    v_mov_b32_e32 v9, v1
2629; CGP-NEXT:    v_mov_b32_e32 v8, v0
2630; CGP-NEXT:    v_or_b32_e32 v1, v9, v12
2631; CGP-NEXT:    v_mov_b32_e32 v0, 0
2632; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
2633; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
2634; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2635; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
2636; CGP-NEXT:    s_cbranch_execz .LBB8_2
2637; CGP-NEXT:  ; %bb.1:
2638; CGP-NEXT:    v_ashrrev_i32_e32 v1, 31, v12
2639; CGP-NEXT:    v_add_i32_e32 v0, vcc, v11, v1
2640; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v12, v1, vcc
2641; CGP-NEXT:    v_xor_b32_e32 v0, v0, v1
2642; CGP-NEXT:    v_xor_b32_e32 v1, v4, v1
2643; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v0
2644; CGP-NEXT:    v_cvt_f32_u32_e32 v10, v1
2645; CGP-NEXT:    v_sub_i32_e32 v14, vcc, 0, v0
2646; CGP-NEXT:    v_subb_u32_e32 v15, vcc, 0, v1, vcc
2647; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v10
2648; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
2649; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
2650; CGP-NEXT:    v_mul_f32_e32 v10, 0x2f800000, v4
2651; CGP-NEXT:    v_trunc_f32_e32 v12, v10
2652; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v12
2653; CGP-NEXT:    v_cvt_u32_f32_e32 v13, v4
2654; CGP-NEXT:    v_cvt_u32_f32_e32 v16, v12
2655; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
2656; CGP-NEXT:    v_mov_b32_e32 v4, v11
2657; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5]
2658; CGP-NEXT:    v_mul_lo_u32 v4, v16, v10
2659; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
2660; CGP-NEXT:    v_mul_hi_u32 v12, v13, v10
2661; CGP-NEXT:    v_mul_hi_u32 v10, v16, v10
2662; CGP-NEXT:    v_mul_lo_u32 v17, v13, v11
2663; CGP-NEXT:    v_mul_lo_u32 v18, v16, v11
2664; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v17
2665; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
2666; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
2667; CGP-NEXT:    v_mul_hi_u32 v12, v13, v11
2668; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2669; CGP-NEXT:    v_add_i32_e32 v4, vcc, v17, v4
2670; CGP-NEXT:    v_add_i32_e32 v10, vcc, v18, v10
2671; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
2672; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2673; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2674; CGP-NEXT:    v_add_i32_e32 v12, vcc, v17, v12
2675; CGP-NEXT:    v_mul_hi_u32 v11, v16, v11
2676; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
2677; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2678; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
2679; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2680; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v4
2681; CGP-NEXT:    v_addc_u32_e32 v16, vcc, v16, v10, vcc
2682; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
2683; CGP-NEXT:    v_mov_b32_e32 v4, v11
2684; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[4:5]
2685; CGP-NEXT:    v_ashrrev_i32_e32 v14, 31, v9
2686; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v14
2687; CGP-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12]
2688; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v9, v14, vcc
2689; CGP-NEXT:    v_xor_b32_e32 v12, v4, v14
2690; CGP-NEXT:    v_mul_lo_u32 v4, v16, v10
2691; CGP-NEXT:    v_mul_lo_u32 v9, v13, v11
2692; CGP-NEXT:    v_xor_b32_e32 v15, v8, v14
2693; CGP-NEXT:    v_mul_hi_u32 v8, v13, v10
2694; CGP-NEXT:    v_mul_hi_u32 v10, v16, v10
2695; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
2696; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2697; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
2698; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2699; CGP-NEXT:    v_mul_lo_u32 v8, v16, v11
2700; CGP-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
2701; CGP-NEXT:    v_mul_hi_u32 v9, v13, v11
2702; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2703; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2704; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
2705; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2706; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
2707; CGP-NEXT:    v_mul_hi_u32 v10, v16, v11
2708; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
2709; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2710; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
2711; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
2712; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
2713; CGP-NEXT:    v_addc_u32_e32 v8, vcc, v16, v8, vcc
2714; CGP-NEXT:    v_mul_lo_u32 v9, v15, v4
2715; CGP-NEXT:    v_mul_lo_u32 v10, v12, v8
2716; CGP-NEXT:    v_mul_hi_u32 v11, v12, v4
2717; CGP-NEXT:    v_mul_hi_u32 v4, v15, v4
2718; CGP-NEXT:    v_mul_hi_u32 v13, v15, v8
2719; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
2720; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2721; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
2722; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2723; CGP-NEXT:    v_mul_lo_u32 v11, v15, v8
2724; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
2725; CGP-NEXT:    v_mul_hi_u32 v10, v12, v8
2726; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
2727; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2728; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
2729; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2730; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2731; CGP-NEXT:    v_add_i32_e32 v11, vcc, v4, v9
2732; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v0, v11, 0
2733; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2734; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
2735; CGP-NEXT:    v_add_i32_e32 v10, vcc, v13, v4
2736; CGP-NEXT:    v_mov_b32_e32 v4, v9
2737; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v0, v10, v[4:5]
2738; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v12, v8
2739; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v1, v11, v[9:10]
2740; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v15, v9, vcc
2741; CGP-NEXT:    v_sub_i32_e64 v9, s[4:5], v15, v9
2742; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v1
2743; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
2744; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v0
2745; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
2746; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v8, v1
2747; CGP-NEXT:    v_subb_u32_e32 v9, vcc, v9, v1, vcc
2748; CGP-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[4:5]
2749; CGP-NEXT:    v_sub_i32_e32 v11, vcc, v4, v0
2750; CGP-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v9, vcc
2751; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v12, v1
2752; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
2753; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v0
2754; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[4:5]
2755; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v12, v1
2756; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v9, v1, vcc
2757; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v11, v0
2758; CGP-NEXT:    v_cndmask_b32_e64 v13, v13, v15, s[4:5]
2759; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
2760; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
2761; CGP-NEXT:    v_cndmask_b32_e32 v0, v11, v0, vcc
2762; CGP-NEXT:    v_cndmask_b32_e32 v1, v12, v1, vcc
2763; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
2764; CGP-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
2765; CGP-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
2766; CGP-NEXT:    v_xor_b32_e32 v0, v0, v14
2767; CGP-NEXT:    v_xor_b32_e32 v1, v1, v14
2768; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v14
2769; CGP-NEXT:    v_subb_u32_e32 v1, vcc, v1, v14, vcc
2770; CGP-NEXT:    ; implicit-def: $vgpr11_vgpr12
2771; CGP-NEXT:    ; implicit-def: $vgpr8
2772; CGP-NEXT:  .LBB8_2: ; %Flow1
2773; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
2774; CGP-NEXT:    v_lshl_b64 v[9:10], v[2:3], v6
2775; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]
2776; CGP-NEXT:    s_cbranch_execz .LBB8_4
2777; CGP-NEXT:  ; %bb.3:
2778; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v11
2779; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v11
2780; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2781; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2782; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
2783; CGP-NEXT:    v_mul_lo_u32 v1, v1, v0
2784; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
2785; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
2786; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
2787; CGP-NEXT:    v_mul_lo_u32 v0, v0, v11
2788; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v8, v0
2789; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v11
2790; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v11
2791; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2792; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v11
2793; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v11
2794; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2795; CGP-NEXT:    v_mov_b32_e32 v1, 0
2796; CGP-NEXT:  .LBB8_4:
2797; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
2798; CGP-NEXT:    v_or_b32_e32 v3, v7, v10
2799; CGP-NEXT:    v_mov_b32_e32 v2, 0
2800; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
2801; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
2802; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2803; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
2804; CGP-NEXT:    s_cbranch_execnz .LBB8_7
2805; CGP-NEXT:  ; %bb.5: ; %Flow
2806; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
2807; CGP-NEXT:    s_cbranch_execnz .LBB8_8
2808; CGP-NEXT:  .LBB8_6:
2809; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
2810; CGP-NEXT:    s_setpc_b64 s[30:31]
2811; CGP-NEXT:  .LBB8_7:
2812; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v10
2813; CGP-NEXT:    v_add_i32_e32 v2, vcc, v9, v3
2814; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v10, v3, vcc
2815; CGP-NEXT:    v_xor_b32_e32 v2, v2, v3
2816; CGP-NEXT:    v_xor_b32_e32 v3, v4, v3
2817; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
2818; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
2819; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v2
2820; CGP-NEXT:    v_subb_u32_e32 v13, vcc, 0, v3, vcc
2821; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v6
2822; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
2823; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
2824; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
2825; CGP-NEXT:    v_trunc_f32_e32 v6, v6
2826; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
2827; CGP-NEXT:    v_cvt_u32_f32_e32 v11, v4
2828; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
2829; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0
2830; CGP-NEXT:    v_mov_b32_e32 v4, v9
2831; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v12, v6, v[4:5]
2832; CGP-NEXT:    v_mul_lo_u32 v4, v6, v8
2833; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10]
2834; CGP-NEXT:    v_mul_hi_u32 v10, v11, v8
2835; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
2836; CGP-NEXT:    v_mul_lo_u32 v14, v11, v9
2837; CGP-NEXT:    v_mul_lo_u32 v15, v6, v9
2838; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
2839; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2840; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
2841; CGP-NEXT:    v_mul_hi_u32 v10, v11, v9
2842; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2843; CGP-NEXT:    v_add_i32_e32 v4, vcc, v14, v4
2844; CGP-NEXT:    v_add_i32_e32 v8, vcc, v15, v8
2845; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2846; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2847; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2848; CGP-NEXT:    v_add_i32_e32 v10, vcc, v14, v10
2849; CGP-NEXT:    v_mul_hi_u32 v9, v6, v9
2850; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
2851; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2852; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
2853; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
2854; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v4
2855; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
2856; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0
2857; CGP-NEXT:    v_mov_b32_e32 v4, v9
2858; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v12, v6, v[4:5]
2859; CGP-NEXT:    v_ashrrev_i32_e32 v12, 31, v7
2860; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v12
2861; CGP-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10]
2862; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v7, v12, vcc
2863; CGP-NEXT:    v_xor_b32_e32 v7, v4, v12
2864; CGP-NEXT:    v_mul_lo_u32 v4, v6, v8
2865; CGP-NEXT:    v_mul_lo_u32 v10, v11, v9
2866; CGP-NEXT:    v_xor_b32_e32 v13, v5, v12
2867; CGP-NEXT:    v_mul_hi_u32 v5, v11, v8
2868; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
2869; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
2870; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2871; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
2872; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2873; CGP-NEXT:    v_mul_lo_u32 v5, v6, v9
2874; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
2875; CGP-NEXT:    v_mul_hi_u32 v10, v11, v9
2876; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
2877; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2878; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
2879; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2880; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2881; CGP-NEXT:    v_mul_hi_u32 v9, v6, v9
2882; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
2883; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
2884; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
2885; CGP-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
2886; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
2887; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v6, v5, vcc
2888; CGP-NEXT:    v_mul_lo_u32 v6, v13, v4
2889; CGP-NEXT:    v_mul_lo_u32 v8, v7, v5
2890; CGP-NEXT:    v_mul_hi_u32 v9, v7, v4
2891; CGP-NEXT:    v_mul_hi_u32 v4, v13, v4
2892; CGP-NEXT:    v_mul_hi_u32 v10, v13, v5
2893; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
2894; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2895; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
2896; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2897; CGP-NEXT:    v_mul_lo_u32 v9, v13, v5
2898; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
2899; CGP-NEXT:    v_mul_hi_u32 v8, v7, v5
2900; CGP-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
2901; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2902; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
2903; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2904; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
2905; CGP-NEXT:    v_add_i32_e32 v9, vcc, v4, v6
2906; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v9, 0
2907; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2908; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
2909; CGP-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
2910; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v2, v6, v[5:6]
2911; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v7, v4
2912; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v9, v[5:6]
2913; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v13, v5, vcc
2914; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v13, v5
2915; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
2916; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
2917; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v2
2918; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
2919; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v6, v3
2920; CGP-NEXT:    v_subb_u32_e32 v5, vcc, v5, v3, vcc
2921; CGP-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
2922; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v4, v2
2923; CGP-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v5, vcc
2924; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v3
2925; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
2926; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v2
2927; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
2928; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v3
2929; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v5, v3, vcc
2930; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
2931; CGP-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[4:5]
2932; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2933; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
2934; CGP-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
2935; CGP-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
2936; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
2937; CGP-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
2938; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
2939; CGP-NEXT:    v_xor_b32_e32 v2, v2, v12
2940; CGP-NEXT:    v_xor_b32_e32 v3, v3, v12
2941; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v12
2942; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v12, vcc
2943; CGP-NEXT:    ; implicit-def: $vgpr9_vgpr10
2944; CGP-NEXT:    ; implicit-def: $vgpr5
2945; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
2946; CGP-NEXT:    s_cbranch_execz .LBB8_6
2947; CGP-NEXT:  .LBB8_8:
2948; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v9
2949; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v9
2950; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v2
2951; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
2952; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
2953; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
2954; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
2955; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
2956; CGP-NEXT:    v_mul_hi_u32 v2, v5, v2
2957; CGP-NEXT:    v_mul_lo_u32 v2, v2, v9
2958; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v5, v2
2959; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v9
2960; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v9
2961; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2962; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v9
2963; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v9
2964; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2965; CGP-NEXT:    v_mov_b32_e32 v3, 0
2966; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
2967; CGP-NEXT:    s_setpc_b64 s[30:31]
2968  %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
2969  %r = srem <2 x i64> %x, %shl.y
2970  ret <2 x i64> %r
2971}
2972
2973define i64 @v_srem_i64_24bit(i64 %num, i64 %den) {
2974; GISEL-LABEL: v_srem_i64_24bit:
2975; GISEL:       ; %bb.0:
2976; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2977; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v2
2978; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
2979; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
2980; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2981; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
2982; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
2983; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
2984; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
2985; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
2986; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
2987; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
2988; GISEL-NEXT:    v_mul_lo_u32 v2, v2, v1
2989; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
2990; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
2991; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
2992; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2993; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
2994; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
2995; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2996; GISEL-NEXT:    v_mov_b32_e32 v1, 0
2997; GISEL-NEXT:    s_setpc_b64 s[30:31]
2998;
2999; CGP-LABEL: v_srem_i64_24bit:
3000; CGP:       ; %bb.0:
3001; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3002; CGP-NEXT:    v_and_b32_e32 v3, 0xffffff, v2
3003; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v3
3004; CGP-NEXT:    v_and_b32_e32 v5, 0xffffff, v0
3005; CGP-NEXT:    v_rcp_f32_e32 v1, v1
3006; CGP-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
3007; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v1
3008; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v3
3009; CGP-NEXT:    v_mul_lo_u32 v1, v1, v4
3010; CGP-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v4, v1, 0
3011; CGP-NEXT:    v_add_i32_e32 v0, vcc, v4, v2
3012; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0
3013; CGP-NEXT:    v_mul_lo_u32 v0, v1, v3
3014; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v5, v0
3015; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v3
3016; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
3017; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3018; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v3
3019; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
3020; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3021; CGP-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
3022; CGP-NEXT:    s_setpc_b64 s[30:31]
3023  %num.mask = and i64 %num, 16777215
3024  %den.mask = and i64 %den, 16777215
3025  %result = srem i64 %num.mask, %den.mask
3026  ret i64 %result
3027}
3028
3029define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
3030; GISEL-LABEL: v_srem_v2i64_24bit:
3031; GISEL:       ; %bb.0:
3032; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3033; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v4
3034; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
3035; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
3036; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v1
3037; GISEL-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v4
3038; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
3039; GISEL-NEXT:    v_subb_u32_e64 v12, s[4:5], 0, 0, vcc
3040; GISEL-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
3041; GISEL-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
3042; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v3
3043; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
3044; GISEL-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v5
3045; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v3
3046; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
3047; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
3048; GISEL-NEXT:    v_mov_b32_e32 v3, v8
3049; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v5, v[3:4]
3050; GISEL-NEXT:    v_mul_lo_u32 v3, v5, v7
3051; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
3052; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v7
3053; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
3054; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v8
3055; GISEL-NEXT:    v_mul_lo_u32 v14, v5, v8
3056; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v13
3057; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
3058; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
3059; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
3060; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
3061; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v13, v3
3062; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v14, v7
3063; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
3064; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
3065; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
3066; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
3067; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
3068; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
3069; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
3070; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
3071; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
3072; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v3
3073; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v7, vcc
3074; GISEL-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v11, v10, 0
3075; GISEL-NEXT:    v_mov_b32_e32 v3, v8
3076; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v11, v5, v[3:4]
3077; GISEL-NEXT:    v_mul_lo_u32 v3, v5, v7
3078; GISEL-NEXT:    v_and_b32_e32 v11, 0xffffff, v0
3079; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v10, v[8:9]
3080; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v7
3081; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
3082; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v8
3083; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
3084; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
3085; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
3086; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3087; GISEL-NEXT:    v_mul_lo_u32 v3, v5, v8
3088; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
3089; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v8
3090; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
3091; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
3092; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
3093; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
3094; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
3095; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
3096; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
3097; GISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
3098; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
3099; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
3100; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
3101; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v3, vcc
3102; GISEL-NEXT:    v_mul_lo_u32 v7, 0, v0
3103; GISEL-NEXT:    v_mul_lo_u32 v8, v11, v5
3104; GISEL-NEXT:    v_and_b32_e32 v3, 0xffffff, v6
3105; GISEL-NEXT:    v_mul_hi_u32 v6, v11, v0
3106; GISEL-NEXT:    v_mul_hi_u32 v0, 0, v0
3107; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
3108; GISEL-NEXT:    v_mul_lo_u32 v8, 0, v5
3109; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
3110; GISEL-NEXT:    v_mul_hi_u32 v7, v11, v5
3111; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
3112; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
3113; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
3114; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
3115; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v0, v6
3116; GISEL-NEXT:    v_mul_hi_u32 v9, 0, v5
3117; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v1, v8, 0
3118; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3119; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
3120; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v9, v0
3121; GISEL-NEXT:    v_mov_b32_e32 v0, v6
3122; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v1, v7, v[0:1]
3123; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, v3
3124; GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], 0, v8, v[6:7]
3125; GISEL-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v4
3126; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
3127; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v11, v5
3128; GISEL-NEXT:    v_subb_u32_e64 v8, s[4:5], 0, v6, vcc
3129; GISEL-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
3130; GISEL-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v0
3131; GISEL-NEXT:    v_trunc_f32_e32 v9, v4
3132; GISEL-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v9
3133; GISEL-NEXT:    v_cvt_u32_f32_e32 v10, v0
3134; GISEL-NEXT:    v_sub_i32_e64 v11, s[4:5], 0, v3
3135; GISEL-NEXT:    v_subb_u32_e64 v12, s[4:5], 0, 0, s[4:5]
3136; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0
3137; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
3138; GISEL-NEXT:    v_sub_i32_e64 v13, s[4:5], 0, v6
3139; GISEL-NEXT:    v_mov_b32_e32 v0, v5
3140; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[0:1]
3141; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
3142; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
3143; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v12, v10, v[5:6]
3144; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
3145; GISEL-NEXT:    v_cndmask_b32_e64 v14, -1, v0, s[4:5]
3146; GISEL-NEXT:    v_mul_lo_u32 v0, v9, v4
3147; GISEL-NEXT:    v_mul_lo_u32 v6, v10, v5
3148; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v4
3149; GISEL-NEXT:    v_subbrev_u32_e32 v13, vcc, 0, v13, vcc
3150; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
3151; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
3152; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v15
3153; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3154; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v5
3155; GISEL-NEXT:    v_mul_hi_u32 v4, v9, v4
3156; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
3157; GISEL-NEXT:    v_mul_hi_u32 v6, v10, v5
3158; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v15, v4
3159; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
3160; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
3161; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
3162; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v15, v6
3163; GISEL-NEXT:    v_mul_hi_u32 v5, v9, v5
3164; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
3165; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
3166; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
3167; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
3168; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v0
3169; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v4, vcc
3170; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v11, v10, 0
3171; GISEL-NEXT:    v_sub_i32_e32 v15, vcc, v7, v1
3172; GISEL-NEXT:    v_mov_b32_e32 v0, v5
3173; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[0:1]
3174; GISEL-NEXT:    v_subbrev_u32_e32 v13, vcc, 0, v13, vcc
3175; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v12, v10, v[5:6]
3176; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v15, v1
3177; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, vcc
3178; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v13
3179; GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v16, vcc
3180; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v15, v1
3181; GISEL-NEXT:    v_subbrev_u32_e32 v6, vcc, 0, v13, vcc
3182; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
3183; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v5
3184; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3185; GISEL-NEXT:    v_mul_hi_u32 v0, v10, v4
3186; GISEL-NEXT:    v_mul_hi_u32 v4, v9, v4
3187; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
3188; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
3189; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v11, v0
3190; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
3191; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v5
3192; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v12, v0
3193; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v5
3194; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v11, v4
3195; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
3196; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v12
3197; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
3198; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
3199; GISEL-NEXT:    v_mul_hi_u32 v5, v9, v5
3200; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v4, v0
3201; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
3202; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v11, v4
3203; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v5, v4
3204; GISEL-NEXT:    v_add_i32_e64 v0, s[4:5], v10, v0
3205; GISEL-NEXT:    v_addc_u32_e64 v4, s[4:5], v9, v4, s[4:5]
3206; GISEL-NEXT:    v_mul_lo_u32 v5, 0, v0
3207; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v4
3208; GISEL-NEXT:    v_cndmask_b32_e32 v10, v13, v6, vcc
3209; GISEL-NEXT:    v_mul_hi_u32 v6, v2, v0
3210; GISEL-NEXT:    v_cndmask_b32_e32 v1, v15, v1, vcc
3211; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
3212; GISEL-NEXT:    v_mul_lo_u32 v9, 0, v4
3213; GISEL-NEXT:    v_mul_hi_u32 v0, 0, v0
3214; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
3215; GISEL-NEXT:    v_mul_hi_u32 v6, v2, v4
3216; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
3217; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
3218; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
3219; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
3220; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v0, v5
3221; GISEL-NEXT:    v_mul_hi_u32 v11, 0, v4
3222; GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v3, v9, 0
3223; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3224; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
3225; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v11, v0
3226; GISEL-NEXT:    v_mov_b32_e32 v0, v5
3227; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v6, v[0:1]
3228; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v14
3229; GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v1, vcc
3230; GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], 0, v9, v[5:6]
3231; GISEL-NEXT:    v_cndmask_b32_e32 v1, v8, v10, vcc
3232; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
3233; GISEL-NEXT:    v_subb_u32_e64 v4, s[4:5], 0, v5, vcc
3234; GISEL-NEXT:    v_sub_i32_e64 v5, s[4:5], 0, v5
3235; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
3236; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v2, v3
3237; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
3238; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v3
3239; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
3240; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
3241; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v3
3242; GISEL-NEXT:    v_cndmask_b32_e32 v8, -1, v8, vcc
3243; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v7, v3
3244; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
3245; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
3246; GISEL-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v5, vcc
3247; GISEL-NEXT:    v_cndmask_b32_e64 v6, -1, v6, s[4:5]
3248; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
3249; GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
3250; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
3251; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
3252; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
3253; GISEL-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
3254; GISEL-NEXT:    s_setpc_b64 s[30:31]
3255;
3256; CGP-LABEL: v_srem_v2i64_24bit:
3257; CGP:       ; %bb.0:
3258; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3259; CGP-NEXT:    v_and_b32_e32 v3, 0xffffff, v4
3260; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v3
3261; CGP-NEXT:    v_and_b32_e32 v4, 0xffffff, v6
3262; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v3
3263; CGP-NEXT:    v_rcp_f32_e32 v1, v1
3264; CGP-NEXT:    v_and_b32_e32 v8, 0xffffff, v0
3265; CGP-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
3266; CGP-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
3267; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v1
3268; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v4
3269; CGP-NEXT:    v_mul_lo_u32 v6, v6, v5
3270; CGP-NEXT:    v_rcp_f32_e32 v7, v1
3271; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v6, 0
3272; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v7
3273; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v0
3274; CGP-NEXT:    v_add_i32_e32 v0, vcc, v5, v1
3275; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v8, v0, 0
3276; CGP-NEXT:    v_sub_i32_e32 v0, vcc, 0, v4
3277; CGP-NEXT:    v_mul_lo_u32 v0, v0, v6
3278; CGP-NEXT:    v_mul_lo_u32 v5, v1, v3
3279; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v0, 0
3280; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v8, v5
3281; CGP-NEXT:    v_add_i32_e32 v0, vcc, v6, v1
3282; CGP-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v2, v0, 0
3283; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v5, v3
3284; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v3
3285; CGP-NEXT:    v_mul_lo_u32 v6, v1, v4
3286; CGP-NEXT:    v_cndmask_b32_e32 v0, v5, v7, vcc
3287; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v0, v3
3288; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
3289; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
3290; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v6
3291; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v4
3292; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
3293; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
3294; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v4
3295; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
3296; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
3297; CGP-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
3298; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
3299; CGP-NEXT:    s_setpc_b64 s[30:31]
3300  %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
3301  %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
3302  %result = srem <2 x i64> %num.mask, %den.mask
3303  ret <2 x i64> %result
3304}
3305