xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
4
5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
6
7define i64 @v_urem_i64(i64 %num, i64 %den) {
8; CHECK-LABEL: v_urem_i64:
9; CHECK:       ; %bb.0:
10; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; CHECK-NEXT:    v_mov_b32_e32 v4, v0
12; CHECK-NEXT:    v_mov_b32_e32 v5, v1
13; CHECK-NEXT:    v_or_b32_e32 v1, v5, v3
14; CHECK-NEXT:    v_mov_b32_e32 v0, 0
15; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
16; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v2
17; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
18; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
19; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
20; CHECK-NEXT:    s_cbranch_execnz .LBB0_3
21; CHECK-NEXT:  ; %bb.1: ; %Flow
22; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
23; CHECK-NEXT:    s_cbranch_execnz .LBB0_4
24; CHECK-NEXT:  .LBB0_2:
25; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
26; CHECK-NEXT:    s_setpc_b64 s[30:31]
27; CHECK-NEXT:  .LBB0_3:
28; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v3
29; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
30; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v3, vcc
31; CHECK-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v0
32; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v6
33; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
34; CHECK-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v0
35; CHECK-NEXT:    v_trunc_f32_e32 v6, v6
36; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v6
37; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v6
38; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
39; CHECK-NEXT:    v_mul_lo_u32 v8, v1, v6
40; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v0
41; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v0
42; CHECK-NEXT:    v_mul_hi_u32 v11, v1, v0
43; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
44; CHECK-NEXT:    v_mul_lo_u32 v10, v6, v9
45; CHECK-NEXT:    v_mul_hi_u32 v12, v0, v9
46; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v9
47; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
48; CHECK-NEXT:    v_mul_lo_u32 v11, v0, v8
49; CHECK-NEXT:    v_mul_lo_u32 v13, v6, v8
50; CHECK-NEXT:    v_mul_hi_u32 v14, v0, v8
51; CHECK-NEXT:    v_mul_hi_u32 v8, v6, v8
52; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
53; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
54; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
55; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
56; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
57; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
58; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
59; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
60; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
61; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
62; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
63; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
64; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
65; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
66; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
67; CHECK-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
68; CHECK-NEXT:    v_mul_lo_u32 v8, v1, v0
69; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v0
70; CHECK-NEXT:    v_mul_hi_u32 v9, v1, v0
71; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v6
72; CHECK-NEXT:    v_mul_lo_u32 v10, v6, v8
73; CHECK-NEXT:    v_mul_hi_u32 v11, v0, v8
74; CHECK-NEXT:    v_mul_hi_u32 v8, v6, v8
75; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v7, v1
76; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
77; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v1
78; CHECK-NEXT:    v_mul_lo_u32 v9, v6, v1
79; CHECK-NEXT:    v_mul_hi_u32 v12, v0, v1
80; CHECK-NEXT:    v_mul_hi_u32 v1, v6, v1
81; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
82; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
83; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
84; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
85; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
86; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
87; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
88; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
89; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
90; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
91; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
92; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
93; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
94; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
95; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
96; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v6, v1, vcc
97; CHECK-NEXT:    v_mul_lo_u32 v6, v5, v0
98; CHECK-NEXT:    v_mul_hi_u32 v7, v4, v0
99; CHECK-NEXT:    v_mul_hi_u32 v0, v5, v0
100; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v1
101; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v1
102; CHECK-NEXT:    v_mul_hi_u32 v10, v4, v1
103; CHECK-NEXT:    v_mul_hi_u32 v1, v5, v1
104; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
105; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
106; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
107; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
108; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
109; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
110; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
111; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
112; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
113; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
114; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
115; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
116; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
117; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v0
118; CHECK-NEXT:    v_mul_lo_u32 v8, v3, v0
119; CHECK-NEXT:    v_mul_hi_u32 v0, v2, v0
120; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
121; CHECK-NEXT:    v_mul_lo_u32 v1, v2, v1
122; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
123; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
124; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v4, v7
125; CHECK-NEXT:    v_subb_u32_e64 v4, s[4:5], v5, v0, vcc
126; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v5, v0
127; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v2
128; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
129; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v3
130; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
131; CHECK-NEXT:    v_subb_u32_e32 v0, vcc, v0, v3, vcc
132; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v3
133; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
134; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v1, v2
135; CHECK-NEXT:    v_subbrev_u32_e64 v7, s[4:5], 0, v0, vcc
136; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v2
137; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
138; CHECK-NEXT:    v_subb_u32_e32 v0, vcc, v0, v3, vcc
139; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v3
140; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
141; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v6, v2
142; CHECK-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
143; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v7, v3
144; CHECK-NEXT:    v_cndmask_b32_e32 v3, v9, v8, vcc
145; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
146; CHECK-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
147; CHECK-NEXT:    v_cndmask_b32_e32 v3, v7, v0, vcc
148; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
149; CHECK-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
150; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
151; CHECK-NEXT:    ; implicit-def: $vgpr6
152; CHECK-NEXT:    ; implicit-def: $vgpr2
153; CHECK-NEXT:    ; implicit-def: $vgpr4
154; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
155; CHECK-NEXT:    s_cbranch_execz .LBB0_2
156; CHECK-NEXT:  .LBB0_4:
157; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v6
158; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
159; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
160; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
161; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v0
162; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
163; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
164; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
165; CHECK-NEXT:    v_mul_lo_u32 v0, v0, v2
166; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v4, v0
167; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
168; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
169; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
170; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
171; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
172; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
173; CHECK-NEXT:    v_mov_b32_e32 v1, 0
174; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
175; CHECK-NEXT:    s_setpc_b64 s[30:31]
176  %result = urem i64 %num, %den
177  ret i64 %result
178}
179
180; FIXME: This is a workaround for not handling uniform VGPR case.
181declare i32 @llvm.amdgcn.readfirstlane(i32)
182
183define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
184; CHECK-LABEL: s_urem_i64:
185; CHECK:       ; %bb.0:
186; CHECK-NEXT:    s_or_b64 s[4:5], s[0:1], s[2:3]
187; CHECK-NEXT:    s_mov_b32 s6, 0
188; CHECK-NEXT:    s_mov_b32 s7, -1
189; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
190; CHECK-NEXT:    v_cmp_ne_u64_e64 vcc, s[4:5], 0
191; CHECK-NEXT:    s_mov_b32 s6, 1
192; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s2
193; CHECK-NEXT:    s_cbranch_vccz .LBB1_2
194; CHECK-NEXT:  ; %bb.1:
195; CHECK-NEXT:    v_mov_b32_e32 v0, s3
196; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, s3
197; CHECK-NEXT:    s_sub_u32 s4, 0, s2
198; CHECK-NEXT:    s_mov_b32 s6, 0
199; CHECK-NEXT:    v_mov_b32_e32 v3, s1
200; CHECK-NEXT:    v_madmk_f32 v1, v1, 0x4f800000, v2
201; CHECK-NEXT:    s_subb_u32 s5, 0, s3
202; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
203; CHECK-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
204; CHECK-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v1
205; CHECK-NEXT:    v_trunc_f32_e32 v4, v4
206; CHECK-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v4
207; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
208; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
209; CHECK-NEXT:    v_mul_lo_u32 v5, s4, v4
210; CHECK-NEXT:    v_mul_lo_u32 v6, s4, v1
211; CHECK-NEXT:    v_mul_lo_u32 v7, s5, v1
212; CHECK-NEXT:    v_mul_hi_u32 v8, s4, v1
213; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
214; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v6
215; CHECK-NEXT:    v_mul_hi_u32 v9, v1, v6
216; CHECK-NEXT:    v_mul_hi_u32 v6, v4, v6
217; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
218; CHECK-NEXT:    v_mul_lo_u32 v8, v1, v5
219; CHECK-NEXT:    v_mul_lo_u32 v10, v4, v5
220; CHECK-NEXT:    v_mul_hi_u32 v11, v1, v5
221; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
222; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
223; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
224; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
225; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
226; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
227; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
228; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
229; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
230; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
231; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v9
232; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
233; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
234; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
235; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
236; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
237; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
238; CHECK-NEXT:    v_mul_lo_u32 v5, s4, v1
239; CHECK-NEXT:    v_mul_lo_u32 v6, s5, v1
240; CHECK-NEXT:    v_mul_hi_u32 v7, s4, v1
241; CHECK-NEXT:    v_mul_lo_u32 v8, s4, v4
242; CHECK-NEXT:    v_mul_lo_u32 v9, v4, v5
243; CHECK-NEXT:    v_mul_hi_u32 v10, v1, v5
244; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
245; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
246; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
247; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v6
248; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v6
249; CHECK-NEXT:    v_mul_hi_u32 v11, v1, v6
250; CHECK-NEXT:    v_mul_hi_u32 v6, v4, v6
251; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
252; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
253; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
254; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
255; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
256; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
257; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v11
258; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
259; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
260; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
261; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
262; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
263; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
264; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
265; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
266; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v6, vcc
267; CHECK-NEXT:    v_mul_lo_u32 v5, s1, v1
268; CHECK-NEXT:    v_mul_hi_u32 v6, s0, v1
269; CHECK-NEXT:    v_mul_hi_u32 v1, s1, v1
270; CHECK-NEXT:    v_mul_lo_u32 v7, s0, v4
271; CHECK-NEXT:    v_mul_lo_u32 v8, s1, v4
272; CHECK-NEXT:    v_mul_hi_u32 v9, s0, v4
273; CHECK-NEXT:    v_mul_hi_u32 v4, s1, v4
274; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
275; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
276; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
277; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
278; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
279; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
280; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
281; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
282; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
283; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
284; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
285; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
286; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
287; CHECK-NEXT:    v_mul_lo_u32 v6, s2, v1
288; CHECK-NEXT:    v_mul_lo_u32 v7, s3, v1
289; CHECK-NEXT:    v_mul_hi_u32 v1, s2, v1
290; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
291; CHECK-NEXT:    v_mul_lo_u32 v4, s2, v4
292; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
293; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
294; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, s0, v6
295; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v1, vcc
296; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], s1, v1
297; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v4
298; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
299; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s3, v3
300; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
301; CHECK-NEXT:    v_subb_u32_e32 v0, vcc, v1, v0, vcc
302; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v3
303; CHECK-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
304; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s2, v4
305; CHECK-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
306; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v3
307; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
308; CHECK-NEXT:    v_subrev_i32_e32 v6, vcc, s2, v3
309; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s3, v0
310; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
311; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v0
312; CHECK-NEXT:    v_cndmask_b32_e32 v0, v7, v5, vcc
313; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
314; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v6, vcc
315; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
316; CHECK-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
317; CHECK-NEXT:    s_branch .LBB1_3
318; CHECK-NEXT:  .LBB1_2:
319; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
320; CHECK-NEXT:  .LBB1_3: ; %Flow
321; CHECK-NEXT:    s_xor_b32 s1, s6, 1
322; CHECK-NEXT:    s_and_b32 s1, s1, 1
323; CHECK-NEXT:    s_cmp_lg_u32 s1, 0
324; CHECK-NEXT:    s_cbranch_scc1 .LBB1_5
325; CHECK-NEXT:  ; %bb.4:
326; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v2
327; CHECK-NEXT:    s_sub_i32 s1, 0, s2
328; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
329; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
330; CHECK-NEXT:    v_mul_lo_u32 v1, s1, v0
331; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
332; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
333; CHECK-NEXT:    v_mul_hi_u32 v0, s0, v0
334; CHECK-NEXT:    v_mul_lo_u32 v0, v0, s2
335; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
336; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s2, v0
337; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
338; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
339; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s2, v0
340; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
341; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
342; CHECK-NEXT:  .LBB1_5:
343; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
344; CHECK-NEXT:    s_mov_b32 s1, s0
345; CHECK-NEXT:    ; return to shader part epilog
346  %result = urem i64 %num, %den
347  %cast = bitcast i64 %result to <2 x i32>
348  %elt.0 = extractelement <2 x i32> %cast, i32 0
349  %elt.1 = extractelement <2 x i32> %cast, i32 1
350  %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
351  %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
352  %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
353  %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
354  %cast.back = bitcast <2 x i32> %ins.1 to i64
355  ret i64 %cast.back
356}
357
358define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
359; GISEL-LABEL: v_urem_v2i64:
360; GISEL:       ; %bb.0:
361; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
363; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
364; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
365; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
366; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
367; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
368; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
369; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
370; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
371; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
372; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
373; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
374; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
375; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
376; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
377; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
378; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
379; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
380; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
381; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v13
382; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v12
383; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
384; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
385; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
386; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
387; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
388; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
389; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
390; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
391; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
392; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
393; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
394; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
395; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
396; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
397; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
398; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
399; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
400; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
401; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
402; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v13, vcc
403; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
404; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
405; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v9
406; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v8
407; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
408; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
409; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v12
410; GISEL-NEXT:    v_mul_lo_u32 v13, v8, v10
411; GISEL-NEXT:    v_mul_hi_u32 v14, v8, v12
412; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
413; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
414; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v14
415; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
416; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
417; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v10
418; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
419; GISEL-NEXT:    v_mul_hi_u32 v14, v8, v10
420; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
421; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
422; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
423; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
424; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
425; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
426; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
427; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
428; GISEL-NEXT:    v_mul_hi_u32 v10, v9, v10
429; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
430; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
431; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
432; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v8
433; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
434; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v8
435; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
436; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
437; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
438; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
439; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
440; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
441; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
442; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
443; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
444; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
445; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
446; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
447; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
448; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
449; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
450; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
451; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
452; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
453; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v8
454; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
455; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v9
456; GISEL-NEXT:    v_mul_hi_u32 v8, v4, v8
457; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
458; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
459; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
460; GISEL-NEXT:    v_subb_u32_e64 v9, s[4:5], v1, v8, vcc
461; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v8
462; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v5
463; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
464; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
465; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
466; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v5
467; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
468; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], v0, v4
469; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
470; GISEL-NEXT:    v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5]
471; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v5
472; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
473; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v4
474; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
475; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v5
476; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
477; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v10, v4
478; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5]
479; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
480; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
481; GISEL-NEXT:    v_cndmask_b32_e32 v4, v10, v4, vcc
482; GISEL-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
483; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
484; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
485; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
486; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v6
487; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v7
488; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
489; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
490; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
491; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
492; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
493; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
494; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
495; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
496; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
497; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
498; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
499; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
500; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v5
501; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v4
502; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
503; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
504; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v10
505; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v11
506; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v10
507; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
508; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
509; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
510; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
511; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
512; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v11
513; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
514; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
515; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
516; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
517; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
518; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
519; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
520; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
521; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
522; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
523; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
524; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
525; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
526; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v11, vcc
527; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
528; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v4
529; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v5
530; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
531; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
532; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
533; GISEL-NEXT:    v_mul_lo_u32 v9, v5, v10
534; GISEL-NEXT:    v_mul_lo_u32 v11, v4, v8
535; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v10
536; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
537; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
538; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
539; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
540; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
541; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
542; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
543; GISEL-NEXT:    v_mul_hi_u32 v12, v4, v8
544; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
545; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
546; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
547; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
548; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
549; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
550; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
551; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
552; GISEL-NEXT:    v_mul_hi_u32 v8, v5, v8
553; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
554; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
555; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
556; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
557; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
558; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
559; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
560; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
561; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
562; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
563; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
564; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
565; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
566; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v5
567; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
568; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
569; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
570; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
571; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
572; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
573; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
574; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
575; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
576; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
577; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
578; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v4
579; GISEL-NEXT:    v_mul_lo_u32 v5, v6, v5
580; GISEL-NEXT:    v_mul_hi_u32 v4, v6, v4
581; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
582; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
583; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
584; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v3, v4, vcc
585; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
586; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v7
587; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
588; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
589; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
590; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v7
591; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
592; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v2, v6
593; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
594; GISEL-NEXT:    v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5]
595; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v7
596; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
597; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v6
598; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
599; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v7
600; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
601; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v8, v6
602; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5]
603; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
604; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
605; GISEL-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
606; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
607; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
608; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
609; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
610; GISEL-NEXT:    s_setpc_b64 s[30:31]
611;
612; CGP-LABEL: v_urem_v2i64:
613; CGP:       ; %bb.0:
614; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
615; CGP-NEXT:    v_mov_b32_e32 v10, v0
616; CGP-NEXT:    v_mov_b32_e32 v11, v1
617; CGP-NEXT:    v_mov_b32_e32 v8, v2
618; CGP-NEXT:    v_mov_b32_e32 v9, v3
619; CGP-NEXT:    v_or_b32_e32 v1, v11, v5
620; CGP-NEXT:    v_mov_b32_e32 v0, 0
621; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
622; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v4
623; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
624; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
625; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
626; CGP-NEXT:    s_cbranch_execz .LBB2_2
627; CGP-NEXT:  ; %bb.1:
628; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v5
629; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v4
630; CGP-NEXT:    v_subb_u32_e32 v3, vcc, 0, v5, vcc
631; CGP-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v0
632; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v2
633; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
634; CGP-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v0
635; CGP-NEXT:    v_trunc_f32_e32 v2, v2
636; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
637; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
638; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
639; CGP-NEXT:    v_mul_lo_u32 v12, v1, v2
640; CGP-NEXT:    v_mul_lo_u32 v13, v1, v0
641; CGP-NEXT:    v_mul_lo_u32 v14, v3, v0
642; CGP-NEXT:    v_mul_hi_u32 v15, v1, v0
643; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
644; CGP-NEXT:    v_mul_lo_u32 v14, v2, v13
645; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
646; CGP-NEXT:    v_mul_hi_u32 v13, v2, v13
647; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
648; CGP-NEXT:    v_mul_lo_u32 v15, v0, v12
649; CGP-NEXT:    v_mul_lo_u32 v17, v2, v12
650; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
651; CGP-NEXT:    v_mul_hi_u32 v12, v2, v12
652; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
653; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
654; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
655; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
656; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
657; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
658; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
659; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
660; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
661; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
662; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
663; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
664; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
665; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
666; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
667; CGP-NEXT:    v_addc_u32_e32 v2, vcc, v2, v12, vcc
668; CGP-NEXT:    v_mul_lo_u32 v12, v1, v0
669; CGP-NEXT:    v_mul_lo_u32 v3, v3, v0
670; CGP-NEXT:    v_mul_hi_u32 v13, v1, v0
671; CGP-NEXT:    v_mul_lo_u32 v1, v1, v2
672; CGP-NEXT:    v_mul_lo_u32 v14, v2, v12
673; CGP-NEXT:    v_mul_hi_u32 v15, v0, v12
674; CGP-NEXT:    v_mul_hi_u32 v12, v2, v12
675; CGP-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
676; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
677; CGP-NEXT:    v_mul_lo_u32 v3, v0, v1
678; CGP-NEXT:    v_mul_lo_u32 v13, v2, v1
679; CGP-NEXT:    v_mul_hi_u32 v16, v0, v1
680; CGP-NEXT:    v_mul_hi_u32 v1, v2, v1
681; CGP-NEXT:    v_add_i32_e32 v3, vcc, v14, v3
682; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
683; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
684; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
685; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v15
686; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
687; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
688; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
689; CGP-NEXT:    v_add_i32_e32 v3, vcc, v14, v3
690; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
691; CGP-NEXT:    v_add_i32_e32 v3, vcc, v12, v3
692; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
693; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
694; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v12
695; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
696; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v2, v1, vcc
697; CGP-NEXT:    v_mul_lo_u32 v2, v11, v0
698; CGP-NEXT:    v_mul_hi_u32 v3, v10, v0
699; CGP-NEXT:    v_mul_hi_u32 v0, v11, v0
700; CGP-NEXT:    v_mul_lo_u32 v12, v10, v1
701; CGP-NEXT:    v_mul_lo_u32 v13, v11, v1
702; CGP-NEXT:    v_mul_hi_u32 v14, v10, v1
703; CGP-NEXT:    v_mul_hi_u32 v1, v11, v1
704; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
705; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
706; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
707; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
708; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
709; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
710; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
711; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
712; CGP-NEXT:    v_add_i32_e32 v2, vcc, v12, v2
713; CGP-NEXT:    v_add_i32_e32 v3, vcc, v13, v3
714; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
715; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
716; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
717; CGP-NEXT:    v_mul_lo_u32 v3, v4, v0
718; CGP-NEXT:    v_mul_lo_u32 v12, v5, v0
719; CGP-NEXT:    v_mul_hi_u32 v0, v4, v0
720; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
721; CGP-NEXT:    v_mul_lo_u32 v1, v4, v1
722; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
723; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
724; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v10, v3
725; CGP-NEXT:    v_subb_u32_e64 v2, s[4:5], v11, v0, vcc
726; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v11, v0
727; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v4
728; CGP-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[4:5]
729; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v5
730; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
731; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
732; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v5
733; CGP-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
734; CGP-NEXT:    v_sub_i32_e32 v10, vcc, v1, v4
735; CGP-NEXT:    v_subbrev_u32_e64 v11, s[4:5], 0, v0, vcc
736; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v4
737; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[4:5]
738; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
739; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v5
740; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
741; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v10, v4
742; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
743; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v5
744; CGP-NEXT:    v_cndmask_b32_e32 v5, v13, v12, vcc
745; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
746; CGP-NEXT:    v_cndmask_b32_e32 v4, v10, v4, vcc
747; CGP-NEXT:    v_cndmask_b32_e32 v5, v11, v0, vcc
748; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
749; CGP-NEXT:    v_cndmask_b32_e32 v0, v1, v4, vcc
750; CGP-NEXT:    v_cndmask_b32_e32 v1, v2, v5, vcc
751; CGP-NEXT:    ; implicit-def: $vgpr2
752; CGP-NEXT:    ; implicit-def: $vgpr4
753; CGP-NEXT:    ; implicit-def: $vgpr10
754; CGP-NEXT:  .LBB2_2: ; %Flow1
755; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
756; CGP-NEXT:    s_cbranch_execz .LBB2_4
757; CGP-NEXT:  ; %bb.3:
758; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v2
759; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v4
760; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
761; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
762; CGP-NEXT:    v_mul_lo_u32 v1, v1, v0
763; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
764; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
765; CGP-NEXT:    v_mul_hi_u32 v0, v10, v0
766; CGP-NEXT:    v_mul_lo_u32 v0, v0, v4
767; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v10, v0
768; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
769; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
770; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
771; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
772; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
773; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
774; CGP-NEXT:    v_mov_b32_e32 v1, 0
775; CGP-NEXT:  .LBB2_4:
776; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
777; CGP-NEXT:    v_or_b32_e32 v3, v9, v7
778; CGP-NEXT:    v_mov_b32_e32 v2, 0
779; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
780; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v6
781; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
782; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
783; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
784; CGP-NEXT:    s_cbranch_execnz .LBB2_7
785; CGP-NEXT:  ; %bb.5: ; %Flow
786; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
787; CGP-NEXT:    s_cbranch_execnz .LBB2_8
788; CGP-NEXT:  .LBB2_6:
789; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
790; CGP-NEXT:    s_setpc_b64 s[30:31]
791; CGP-NEXT:  .LBB2_7:
792; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v7
793; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v6
794; CGP-NEXT:    v_subb_u32_e32 v5, vcc, 0, v7, vcc
795; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v2
796; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v4
797; CGP-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
798; CGP-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v2
799; CGP-NEXT:    v_trunc_f32_e32 v4, v4
800; CGP-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
801; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
802; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
803; CGP-NEXT:    v_mul_lo_u32 v10, v3, v4
804; CGP-NEXT:    v_mul_lo_u32 v11, v3, v2
805; CGP-NEXT:    v_mul_lo_u32 v12, v5, v2
806; CGP-NEXT:    v_mul_hi_u32 v13, v3, v2
807; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
808; CGP-NEXT:    v_mul_lo_u32 v12, v4, v11
809; CGP-NEXT:    v_mul_hi_u32 v14, v2, v11
810; CGP-NEXT:    v_mul_hi_u32 v11, v4, v11
811; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
812; CGP-NEXT:    v_mul_lo_u32 v13, v2, v10
813; CGP-NEXT:    v_mul_lo_u32 v15, v4, v10
814; CGP-NEXT:    v_mul_hi_u32 v16, v2, v10
815; CGP-NEXT:    v_mul_hi_u32 v10, v4, v10
816; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
817; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
818; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
819; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
820; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
821; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
822; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
823; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
824; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
825; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
826; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
827; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
828; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
829; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
830; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
831; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v10, vcc
832; CGP-NEXT:    v_mul_lo_u32 v10, v3, v2
833; CGP-NEXT:    v_mul_lo_u32 v5, v5, v2
834; CGP-NEXT:    v_mul_hi_u32 v11, v3, v2
835; CGP-NEXT:    v_mul_lo_u32 v3, v3, v4
836; CGP-NEXT:    v_mul_lo_u32 v12, v4, v10
837; CGP-NEXT:    v_mul_hi_u32 v13, v2, v10
838; CGP-NEXT:    v_mul_hi_u32 v10, v4, v10
839; CGP-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
840; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v11
841; CGP-NEXT:    v_mul_lo_u32 v5, v2, v3
842; CGP-NEXT:    v_mul_lo_u32 v11, v4, v3
843; CGP-NEXT:    v_mul_hi_u32 v14, v2, v3
844; CGP-NEXT:    v_mul_hi_u32 v3, v4, v3
845; CGP-NEXT:    v_add_i32_e32 v5, vcc, v12, v5
846; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
847; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
848; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
849; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
850; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
851; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
852; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
853; CGP-NEXT:    v_add_i32_e32 v5, vcc, v12, v5
854; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
855; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
856; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
857; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
858; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v10
859; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
860; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v4, v3, vcc
861; CGP-NEXT:    v_mul_lo_u32 v4, v9, v2
862; CGP-NEXT:    v_mul_hi_u32 v5, v8, v2
863; CGP-NEXT:    v_mul_hi_u32 v2, v9, v2
864; CGP-NEXT:    v_mul_lo_u32 v10, v8, v3
865; CGP-NEXT:    v_mul_lo_u32 v11, v9, v3
866; CGP-NEXT:    v_mul_hi_u32 v12, v8, v3
867; CGP-NEXT:    v_mul_hi_u32 v3, v9, v3
868; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
869; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
870; CGP-NEXT:    v_add_i32_e32 v2, vcc, v11, v2
871; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
872; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
873; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
874; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
875; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
876; CGP-NEXT:    v_add_i32_e32 v4, vcc, v10, v4
877; CGP-NEXT:    v_add_i32_e32 v5, vcc, v11, v5
878; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
879; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
880; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
881; CGP-NEXT:    v_mul_lo_u32 v5, v6, v2
882; CGP-NEXT:    v_mul_lo_u32 v10, v7, v2
883; CGP-NEXT:    v_mul_hi_u32 v2, v6, v2
884; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
885; CGP-NEXT:    v_mul_lo_u32 v3, v6, v3
886; CGP-NEXT:    v_add_i32_e32 v3, vcc, v10, v3
887; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
888; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v8, v5
889; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v9, v2, vcc
890; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v9, v2
891; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v6
892; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
893; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v7
894; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
895; CGP-NEXT:    v_subb_u32_e32 v2, vcc, v2, v7, vcc
896; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v7
897; CGP-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
898; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v3, v6
899; CGP-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v2, vcc
900; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v6
901; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
902; CGP-NEXT:    v_subb_u32_e32 v2, vcc, v2, v7, vcc
903; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v7
904; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
905; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v8, v6
906; CGP-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
907; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v7
908; CGP-NEXT:    v_cndmask_b32_e32 v7, v11, v10, vcc
909; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
910; CGP-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
911; CGP-NEXT:    v_cndmask_b32_e32 v7, v9, v2, vcc
912; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
913; CGP-NEXT:    v_cndmask_b32_e32 v2, v3, v6, vcc
914; CGP-NEXT:    v_cndmask_b32_e32 v3, v4, v7, vcc
915; CGP-NEXT:    ; implicit-def: $vgpr4
916; CGP-NEXT:    ; implicit-def: $vgpr6
917; CGP-NEXT:    ; implicit-def: $vgpr8
918; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
919; CGP-NEXT:    s_cbranch_execz .LBB2_6
920; CGP-NEXT:  .LBB2_8:
921; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v4
922; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v6
923; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
924; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
925; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
926; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
927; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
928; CGP-NEXT:    v_mul_hi_u32 v2, v8, v2
929; CGP-NEXT:    v_mul_lo_u32 v2, v2, v6
930; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
931; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v6
932; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
933; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
934; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v6
935; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
936; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
937; CGP-NEXT:    v_mov_b32_e32 v3, 0
938; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
939; CGP-NEXT:    s_setpc_b64 s[30:31]
940  %result = urem <2 x i64> %num, %den
941  ret <2 x i64> %result
942}
943
944define i64 @v_urem_i64_pow2k_denom(i64 %num) {
945; CHECK-LABEL: v_urem_i64_pow2k_denom:
946; CHECK:       ; %bb.0:
947; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
948; CHECK-NEXT:    v_and_b32_e32 v0, 0xfff, v0
949; CHECK-NEXT:    v_mov_b32_e32 v1, 0
950; CHECK-NEXT:    s_setpc_b64 s[30:31]
951  %result = urem i64 %num, 4096
952  ret i64 %result
953}
954
955define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) {
956; CHECK-LABEL: v_urem_v2i64_pow2k_denom:
957; CHECK:       ; %bb.0:
958; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
959; CHECK-NEXT:    v_and_b32_e32 v0, 0xfff, v0
960; CHECK-NEXT:    v_and_b32_e32 v2, 0xfff, v2
961; CHECK-NEXT:    v_mov_b32_e32 v1, 0
962; CHECK-NEXT:    v_mov_b32_e32 v3, 0
963; CHECK-NEXT:    s_setpc_b64 s[30:31]
964  %result = urem <2 x i64> %num, <i64 4096, i64 4096>
965  ret <2 x i64> %result
966}
967
968define i64 @v_urem_i64_oddk_denom(i64 %num) {
969; CHECK-LABEL: v_urem_i64_oddk_denom:
970; CHECK:       ; %bb.0:
971; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
972; CHECK-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
973; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, 0x12d8fb
974; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v4, 0
975; CHECK-NEXT:    v_mov_b32_e32 v5, 0xffed2705
976; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v4
977; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
978; CHECK-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
979; CHECK-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
980; CHECK-NEXT:    v_trunc_f32_e32 v4, v4
981; CHECK-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v4
982; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
983; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
984; CHECK-NEXT:    v_mul_lo_u32 v6, v4, v5
985; CHECK-NEXT:    v_mul_lo_u32 v7, v3, v5
986; CHECK-NEXT:    v_mul_hi_u32 v8, v3, v5
987; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v6, v3
988; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
989; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v7
990; CHECK-NEXT:    v_mul_hi_u32 v9, v3, v7
991; CHECK-NEXT:    v_mul_hi_u32 v7, v4, v7
992; CHECK-NEXT:    v_mul_lo_u32 v10, v3, v6
993; CHECK-NEXT:    v_mul_lo_u32 v11, v4, v6
994; CHECK-NEXT:    v_mul_hi_u32 v12, v3, v6
995; CHECK-NEXT:    v_mul_hi_u32 v6, v4, v6
996; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
997; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
998; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
999; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1000; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
1001; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1002; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
1003; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1004; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
1005; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
1006; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
1007; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1008; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1009; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1010; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
1011; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v6, vcc
1012; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v5
1013; CHECK-NEXT:    v_mul_hi_u32 v7, v3, v5
1014; CHECK-NEXT:    v_mul_lo_u32 v5, v4, v5
1015; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v6
1016; CHECK-NEXT:    v_mul_hi_u32 v9, v3, v6
1017; CHECK-NEXT:    v_mul_hi_u32 v6, v4, v6
1018; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, v5, v3
1019; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
1020; CHECK-NEXT:    v_mul_lo_u32 v7, v3, v5
1021; CHECK-NEXT:    v_mul_lo_u32 v10, v4, v5
1022; CHECK-NEXT:    v_mul_hi_u32 v11, v3, v5
1023; CHECK-NEXT:    v_mul_hi_u32 v5, v4, v5
1024; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
1025; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1026; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
1027; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1028; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
1029; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1030; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
1031; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1032; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
1033; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v9
1034; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
1035; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1036; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
1037; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
1038; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
1039; CHECK-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
1040; CHECK-NEXT:    v_mul_lo_u32 v5, v1, v3
1041; CHECK-NEXT:    v_mul_hi_u32 v6, v0, v3
1042; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
1043; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v4
1044; CHECK-NEXT:    v_mul_lo_u32 v8, v1, v4
1045; CHECK-NEXT:    v_mul_hi_u32 v9, v0, v4
1046; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
1047; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
1048; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1049; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
1050; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1051; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
1052; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1053; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
1054; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1055; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1056; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
1057; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
1058; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1059; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
1060; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v2
1061; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v2
1062; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
1063; CHECK-NEXT:    v_mul_lo_u32 v4, v4, v2
1064; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
1065; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v6
1066; CHECK-NEXT:    v_subb_u32_e64 v4, vcc, v1, v3, s[4:5]
1067; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
1068; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
1069; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
1070; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
1071; CHECK-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v4
1072; CHECK-NEXT:    v_cndmask_b32_e64 v3, -1, v3, s[6:7]
1073; CHECK-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
1074; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v2
1075; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
1076; CHECK-NEXT:    s_mov_b64 s[4:5], vcc
1077; CHECK-NEXT:    v_subrev_i32_e32 v6, vcc, 0x12d8fb, v5
1078; CHECK-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
1079; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
1080; CHECK-NEXT:    v_cndmask_b32_e64 v2, -1, v2, s[4:5]
1081; CHECK-NEXT:    v_subbrev_u32_e32 v7, vcc, 0, v1, vcc
1082; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
1083; CHECK-NEXT:    v_cndmask_b32_e32 v2, v5, v6, vcc
1084; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1085; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
1086; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1087; CHECK-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
1088; CHECK-NEXT:    s_setpc_b64 s[30:31]
1089  %result = urem i64 %num, 1235195
1090  ret i64 %result
1091}
1092
1093define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
1094; GISEL-LABEL: v_urem_v2i64_oddk_denom:
1095; GISEL:       ; %bb.0:
1096; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1097; GISEL-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
1098; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, 0x12d8fb
1099; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v7, 0
1100; GISEL-NEXT:    s_mov_b32 s4, 1
1101; GISEL-NEXT:    v_mov_b32_e32 v5, 0xffed2705
1102; GISEL-NEXT:    s_mov_b32 s5, 1
1103; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v7
1104; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
1105; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
1106; GISEL-NEXT:    s_subb_u32 s6, 0, 0
1107; GISEL-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
1108; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v6
1109; GISEL-NEXT:    s_cmp_lg_u32 s5, 0
1110; GISEL-NEXT:    s_subb_u32 s7, 0, 0
1111; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
1112; GISEL-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v7
1113; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
1114; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
1115; GISEL-NEXT:    v_mul_lo_u32 v8, v7, v5
1116; GISEL-NEXT:    v_mul_lo_u32 v9, v6, v5
1117; GISEL-NEXT:    v_mul_lo_u32 v10, s6, v6
1118; GISEL-NEXT:    v_mul_hi_u32 v11, v6, v5
1119; GISEL-NEXT:    v_mul_lo_u32 v12, s7, v6
1120; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v8
1121; GISEL-NEXT:    v_mul_lo_u32 v13, v7, v9
1122; GISEL-NEXT:    v_mul_hi_u32 v14, v6, v9
1123; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
1124; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
1125; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1126; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
1127; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v10
1128; GISEL-NEXT:    v_mul_lo_u32 v12, v7, v10
1129; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v10
1130; GISEL-NEXT:    v_mul_hi_u32 v10, v7, v10
1131; GISEL-NEXT:    v_mul_lo_u32 v16, v6, v8
1132; GISEL-NEXT:    v_mul_lo_u32 v17, v7, v8
1133; GISEL-NEXT:    v_mul_hi_u32 v18, v6, v8
1134; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
1135; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
1136; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
1137; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
1138; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1139; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1140; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v9
1141; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
1142; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v17, v9
1143; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[4:5]
1144; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
1145; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1146; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v15
1147; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1148; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1149; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v18
1150; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1151; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v19, v11
1152; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1153; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v16, v15
1154; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v17, v18
1155; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1156; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1157; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
1158; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1159; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
1160; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
1161; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
1162; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
1163; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v6, v11
1164; GISEL-NEXT:    v_addc_u32_e32 v10, vcc, v7, v10, vcc
1165; GISEL-NEXT:    v_mul_lo_u32 v12, v11, v5
1166; GISEL-NEXT:    v_mul_lo_u32 v13, s6, v11
1167; GISEL-NEXT:    v_mul_hi_u32 v14, v11, v5
1168; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
1169; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v8, vcc
1170; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v5
1171; GISEL-NEXT:    v_mul_lo_u32 v9, s7, v6
1172; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v5
1173; GISEL-NEXT:    v_mul_lo_u32 v16, v10, v5
1174; GISEL-NEXT:    v_mul_lo_u32 v17, v10, v12
1175; GISEL-NEXT:    v_mul_hi_u32 v18, v11, v12
1176; GISEL-NEXT:    v_mul_hi_u32 v12, v10, v12
1177; GISEL-NEXT:    v_mul_lo_u32 v5, v7, v5
1178; GISEL-NEXT:    v_mul_lo_u32 v19, v7, v8
1179; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
1180; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v8
1181; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
1182; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
1183; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v13, v14
1184; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v15
1185; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v9
1186; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
1187; GISEL-NEXT:    v_mul_hi_u32 v15, v11, v9
1188; GISEL-NEXT:    v_mul_hi_u32 v9, v10, v9
1189; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
1190; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1191; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
1192; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v5
1193; GISEL-NEXT:    v_mul_lo_u32 v18, v7, v5
1194; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v19, v13
1195; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1196; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v16
1197; GISEL-NEXT:    v_mul_hi_u32 v13, v6, v5
1198; GISEL-NEXT:    v_mul_hi_u32 v5, v7, v5
1199; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v12
1200; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
1201; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v18, v8
1202; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
1203; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1204; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
1205; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1206; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
1207; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
1208; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
1209; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1210; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
1211; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
1212; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v16, v13
1213; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v17
1214; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1215; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v15
1216; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1217; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
1218; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1219; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
1220; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
1221; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1222; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v10, v9, vcc
1223; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v11
1224; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v11
1225; GISEL-NEXT:    v_mul_hi_u32 v11, v1, v11
1226; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1227; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
1228; GISEL-NEXT:    v_mul_lo_u32 v7, v3, v6
1229; GISEL-NEXT:    v_mul_hi_u32 v8, v2, v6
1230; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
1231; GISEL-NEXT:    v_mul_lo_u32 v13, v0, v9
1232; GISEL-NEXT:    v_mul_lo_u32 v14, v1, v9
1233; GISEL-NEXT:    v_mul_hi_u32 v15, v0, v9
1234; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
1235; GISEL-NEXT:    v_mul_lo_u32 v16, v2, v5
1236; GISEL-NEXT:    v_mul_lo_u32 v17, v3, v5
1237; GISEL-NEXT:    v_mul_hi_u32 v18, v2, v5
1238; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
1239; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
1240; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1241; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v14, v11
1242; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1243; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v16
1244; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1245; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v17, v6
1246; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1247; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
1248; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1249; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v15
1250; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1251; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
1252; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1253; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v18
1254; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1255; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
1256; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
1257; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v16, v7
1258; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v17, v8
1259; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1260; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1261; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
1262; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1263; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1264; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v4
1265; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v4
1266; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
1267; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
1268; GISEL-NEXT:    v_mul_hi_u32 v6, v6, v4
1269; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1270; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
1271; GISEL-NEXT:    v_mul_lo_u32 v7, v9, v4
1272; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v4
1273; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
1274; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
1275; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v12
1276; GISEL-NEXT:    v_subb_u32_e64 v6, vcc, v1, v7, s[4:5]
1277; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
1278; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
1279; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
1280; GISEL-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v8
1281; GISEL-NEXT:    v_subb_u32_e64 v8, vcc, v3, v5, s[6:7]
1282; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
1283; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
1284; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
1285; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v2, v4
1286; GISEL-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v6
1287; GISEL-NEXT:    v_cndmask_b32_e64 v7, -1, v7, s[8:9]
1288; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
1289; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
1290; GISEL-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
1291; GISEL-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7]
1292; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v4
1293; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
1294; GISEL-NEXT:    s_mov_b64 s[4:5], vcc
1295; GISEL-NEXT:    v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9
1296; GISEL-NEXT:    v_sub_i32_e64 v12, s[6:7], v0, v4
1297; GISEL-NEXT:    v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7]
1298; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v4
1299; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[6:7]
1300; GISEL-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5]
1301; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
1302; GISEL-NEXT:    v_cndmask_b32_e64 v13, -1, v13, s[4:5]
1303; GISEL-NEXT:    v_sub_i32_e64 v4, s[4:5], v12, v4
1304; GISEL-NEXT:    v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5]
1305; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
1306; GISEL-NEXT:    v_cndmask_b32_e64 v10, -1, v10, s[4:5]
1307; GISEL-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
1308; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
1309; GISEL-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc
1310; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
1311; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
1312; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v14, vcc
1313; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
1314; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1315; GISEL-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s[4:5]
1316; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
1317; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[4:5]
1318; GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
1319; GISEL-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
1320; GISEL-NEXT:    s_setpc_b64 s[30:31]
1321;
1322; CGP-LABEL: v_urem_v2i64_oddk_denom:
1323; CGP:       ; %bb.0:
1324; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1325; CGP-NEXT:    v_mov_b32_e32 v4, 0x12d8fb
1326; CGP-NEXT:    v_cvt_f32_u32_e32 v5, 0x12d8fb
1327; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
1328; CGP-NEXT:    v_mov_b32_e32 v7, 0xffed2705
1329; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
1330; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v5
1331; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
1332; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
1333; CGP-NEXT:    v_trunc_f32_e32 v6, v6
1334; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v6
1335; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
1336; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
1337; CGP-NEXT:    v_mul_lo_u32 v8, v6, v7
1338; CGP-NEXT:    v_mul_lo_u32 v9, v5, v7
1339; CGP-NEXT:    v_mul_hi_u32 v10, v5, v7
1340; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v8, v5
1341; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1342; CGP-NEXT:    v_mul_lo_u32 v10, v6, v9
1343; CGP-NEXT:    v_mul_hi_u32 v11, v5, v9
1344; CGP-NEXT:    v_mul_hi_u32 v9, v6, v9
1345; CGP-NEXT:    v_mul_lo_u32 v12, v5, v8
1346; CGP-NEXT:    v_mul_lo_u32 v13, v6, v8
1347; CGP-NEXT:    v_mul_hi_u32 v14, v5, v8
1348; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
1349; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
1350; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1351; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1352; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1353; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1354; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1355; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
1356; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1357; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
1358; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
1359; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
1360; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1361; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1362; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1363; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1364; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
1365; CGP-NEXT:    v_mul_lo_u32 v8, v5, v7
1366; CGP-NEXT:    v_mul_hi_u32 v9, v5, v7
1367; CGP-NEXT:    v_mul_lo_u32 v7, v6, v7
1368; CGP-NEXT:    v_mul_lo_u32 v10, v6, v8
1369; CGP-NEXT:    v_mul_hi_u32 v11, v5, v8
1370; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
1371; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v7, v5
1372; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
1373; CGP-NEXT:    v_mul_lo_u32 v9, v5, v7
1374; CGP-NEXT:    v_mul_lo_u32 v12, v6, v7
1375; CGP-NEXT:    v_mul_hi_u32 v13, v5, v7
1376; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
1377; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1378; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1379; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
1380; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1381; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1382; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1383; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
1384; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1385; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1386; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v11
1387; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
1388; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1389; CGP-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1390; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
1391; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
1392; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v7, vcc
1393; CGP-NEXT:    v_mul_lo_u32 v7, v1, v5
1394; CGP-NEXT:    v_mul_hi_u32 v8, v0, v5
1395; CGP-NEXT:    v_mul_hi_u32 v9, v1, v5
1396; CGP-NEXT:    v_mul_lo_u32 v10, v3, v5
1397; CGP-NEXT:    v_mul_hi_u32 v11, v2, v5
1398; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
1399; CGP-NEXT:    v_mul_lo_u32 v12, v0, v6
1400; CGP-NEXT:    v_mul_lo_u32 v13, v1, v6
1401; CGP-NEXT:    v_mul_hi_u32 v14, v0, v6
1402; CGP-NEXT:    v_mul_hi_u32 v15, v1, v6
1403; CGP-NEXT:    v_mul_lo_u32 v16, v2, v6
1404; CGP-NEXT:    v_mul_lo_u32 v17, v3, v6
1405; CGP-NEXT:    v_mul_hi_u32 v18, v2, v6
1406; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
1407; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
1408; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1409; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1410; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1411; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v16
1412; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1413; CGP-NEXT:    v_add_i32_e32 v5, vcc, v17, v5
1414; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1415; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
1416; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1417; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v14
1418; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1419; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1420; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1421; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v18
1422; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1423; CGP-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
1424; CGP-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1425; CGP-NEXT:    v_add_i32_e32 v10, vcc, v16, v10
1426; CGP-NEXT:    v_add_i32_e32 v11, vcc, v17, v11
1427; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
1428; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1429; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
1430; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1431; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1432; CGP-NEXT:    v_mul_lo_u32 v9, v7, v4
1433; CGP-NEXT:    v_mul_hi_u32 v7, v7, v4
1434; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1435; CGP-NEXT:    v_mul_lo_u32 v11, v5, v4
1436; CGP-NEXT:    v_mul_hi_u32 v5, v5, v4
1437; CGP-NEXT:    v_add_i32_e32 v8, vcc, v15, v8
1438; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
1439; CGP-NEXT:    v_mul_lo_u32 v8, v8, v4
1440; CGP-NEXT:    v_mul_lo_u32 v6, v6, v4
1441; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
1442; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
1443; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v9
1444; CGP-NEXT:    v_subb_u32_e64 v6, vcc, v1, v7, s[4:5]
1445; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
1446; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
1447; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
1448; CGP-NEXT:    v_sub_i32_e64 v2, s[6:7], v2, v11
1449; CGP-NEXT:    v_subb_u32_e64 v8, vcc, v3, v5, s[6:7]
1450; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
1451; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
1452; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
1453; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v2, v4
1454; CGP-NEXT:    v_cmp_eq_u32_e64 s[8:9], 0, v6
1455; CGP-NEXT:    v_cndmask_b32_e64 v7, -1, v7, s[8:9]
1456; CGP-NEXT:    v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
1457; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v8
1458; CGP-NEXT:    v_cndmask_b32_e64 v5, -1, v5, s[4:5]
1459; CGP-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7]
1460; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v4
1461; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
1462; CGP-NEXT:    s_mov_b64 s[4:5], vcc
1463; CGP-NEXT:    v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9
1464; CGP-NEXT:    v_sub_i32_e64 v12, s[6:7], v0, v4
1465; CGP-NEXT:    v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7]
1466; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v4
1467; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[6:7]
1468; CGP-NEXT:    v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5]
1469; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v1
1470; CGP-NEXT:    v_cndmask_b32_e64 v13, -1, v13, s[4:5]
1471; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v12, v4
1472; CGP-NEXT:    v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5]
1473; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
1474; CGP-NEXT:    v_cndmask_b32_e64 v10, -1, v10, s[4:5]
1475; CGP-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
1476; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
1477; CGP-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc
1478; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v10
1479; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
1480; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v14, vcc
1481; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
1482; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1483; CGP-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s[4:5]
1484; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
1485; CGP-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[4:5]
1486; CGP-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
1487; CGP-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
1488; CGP-NEXT:    s_setpc_b64 s[30:31]
1489  %result = urem <2 x i64> %num, <i64 1235195, i64 1235195>
1490  ret <2 x i64> %result
1491}
1492
1493define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
1494; CHECK-LABEL: v_urem_i64_pow2_shl_denom:
1495; CHECK:       ; %bb.0:
1496; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1497; CHECK-NEXT:    v_mov_b32_e32 v3, v0
1498; CHECK-NEXT:    v_mov_b32_e32 v4, v1
1499; CHECK-NEXT:    v_mov_b32_e32 v0, 0x1000
1500; CHECK-NEXT:    v_mov_b32_e32 v1, 0
1501; CHECK-NEXT:    v_mov_b32_e32 v7, 0
1502; CHECK-NEXT:    v_lshl_b64 v[5:6], v[0:1], v2
1503; CHECK-NEXT:    v_or_b32_e32 v8, v4, v6
1504; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[7:8]
1505; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v5
1506; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
1507; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1508; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
1509; CHECK-NEXT:    s_cbranch_execnz .LBB7_3
1510; CHECK-NEXT:  ; %bb.1: ; %Flow
1511; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
1512; CHECK-NEXT:    s_cbranch_execnz .LBB7_4
1513; CHECK-NEXT:  .LBB7_2:
1514; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
1515; CHECK-NEXT:    s_setpc_b64 s[30:31]
1516; CHECK-NEXT:  .LBB7_3:
1517; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v6
1518; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, 0, v5
1519; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v6, vcc
1520; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v0
1521; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v2
1522; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
1523; CHECK-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v0
1524; CHECK-NEXT:    v_trunc_f32_e32 v2, v2
1525; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
1526; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
1527; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
1528; CHECK-NEXT:    v_mul_lo_u32 v8, v1, v2
1529; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v0
1530; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v0
1531; CHECK-NEXT:    v_mul_hi_u32 v11, v1, v0
1532; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
1533; CHECK-NEXT:    v_mul_lo_u32 v10, v2, v9
1534; CHECK-NEXT:    v_mul_hi_u32 v12, v0, v9
1535; CHECK-NEXT:    v_mul_hi_u32 v9, v2, v9
1536; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
1537; CHECK-NEXT:    v_mul_lo_u32 v11, v0, v8
1538; CHECK-NEXT:    v_mul_lo_u32 v13, v2, v8
1539; CHECK-NEXT:    v_mul_hi_u32 v14, v0, v8
1540; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v8
1541; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1542; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1543; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
1544; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1545; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
1546; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1547; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
1548; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1549; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1550; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
1551; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
1552; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1553; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1554; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1555; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v9
1556; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v8, vcc
1557; CHECK-NEXT:    v_mul_lo_u32 v8, v1, v0
1558; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v0
1559; CHECK-NEXT:    v_mul_hi_u32 v9, v1, v0
1560; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
1561; CHECK-NEXT:    v_mul_lo_u32 v10, v2, v8
1562; CHECK-NEXT:    v_mul_hi_u32 v11, v0, v8
1563; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v8
1564; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v7, v1
1565; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
1566; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v1
1567; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v1
1568; CHECK-NEXT:    v_mul_hi_u32 v12, v0, v1
1569; CHECK-NEXT:    v_mul_hi_u32 v1, v2, v1
1570; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
1571; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1572; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1573; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1574; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v7, v11
1575; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1576; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1577; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1578; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
1579; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1580; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
1581; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1582; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1583; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v8
1584; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
1585; CHECK-NEXT:    v_addc_u32_e32 v1, vcc, v2, v1, vcc
1586; CHECK-NEXT:    v_mul_lo_u32 v2, v4, v0
1587; CHECK-NEXT:    v_mul_hi_u32 v7, v3, v0
1588; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
1589; CHECK-NEXT:    v_mul_lo_u32 v8, v3, v1
1590; CHECK-NEXT:    v_mul_lo_u32 v9, v4, v1
1591; CHECK-NEXT:    v_mul_hi_u32 v10, v3, v1
1592; CHECK-NEXT:    v_mul_hi_u32 v1, v4, v1
1593; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
1594; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1595; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v9, v0
1596; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1597; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
1598; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1599; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
1600; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1601; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
1602; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
1603; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1604; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1605; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
1606; CHECK-NEXT:    v_mul_lo_u32 v7, v5, v0
1607; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v0
1608; CHECK-NEXT:    v_mul_hi_u32 v0, v5, v0
1609; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
1610; CHECK-NEXT:    v_mul_lo_u32 v1, v5, v1
1611; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
1612; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
1613; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v3, v7
1614; CHECK-NEXT:    v_subb_u32_e64 v2, s[4:5], v4, v0, vcc
1615; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v4, v0
1616; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v5
1617; CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, -1, s[4:5]
1618; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
1619; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1620; CHECK-NEXT:    v_subb_u32_e32 v0, vcc, v0, v6, vcc
1621; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v6
1622; CHECK-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
1623; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v1, v5
1624; CHECK-NEXT:    v_subbrev_u32_e64 v7, s[4:5], 0, v0, vcc
1625; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v5
1626; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
1627; CHECK-NEXT:    v_subb_u32_e32 v0, vcc, v0, v6, vcc
1628; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v6
1629; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
1630; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, v4, v5
1631; CHECK-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
1632; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v7, v6
1633; CHECK-NEXT:    v_cndmask_b32_e32 v6, v9, v8, vcc
1634; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
1635; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1636; CHECK-NEXT:    v_cndmask_b32_e32 v5, v7, v0, vcc
1637; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
1638; CHECK-NEXT:    v_cndmask_b32_e32 v0, v1, v4, vcc
1639; CHECK-NEXT:    v_cndmask_b32_e32 v1, v2, v5, vcc
1640; CHECK-NEXT:    ; implicit-def: $vgpr2
1641; CHECK-NEXT:    ; implicit-def: $vgpr5_vgpr6
1642; CHECK-NEXT:    ; implicit-def: $vgpr3
1643; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
1644; CHECK-NEXT:    s_cbranch_execz .LBB7_2
1645; CHECK-NEXT:  .LBB7_4:
1646; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v2
1647; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, 0, v5
1648; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1649; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
1650; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v0
1651; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
1652; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
1653; CHECK-NEXT:    v_mul_hi_u32 v0, v3, v0
1654; CHECK-NEXT:    v_mul_lo_u32 v0, v0, v5
1655; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v3, v0
1656; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v5
1657; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
1658; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1659; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v5
1660; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
1661; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1662; CHECK-NEXT:    v_mov_b32_e32 v1, 0
1663; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
1664; CHECK-NEXT:    s_setpc_b64 s[30:31]
1665  %shl.y = shl i64 4096, %y
1666  %r = urem i64 %x, %shl.y
1667  ret i64 %r
1668}
1669
1670define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
1671; GISEL-LABEL: v_urem_v2i64_pow2_shl_denom:
1672; GISEL:       ; %bb.0:
1673; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1674; GISEL-NEXT:    v_mov_b32_e32 v9, 0x1000
1675; GISEL-NEXT:    v_mov_b32_e32 v10, 0
1676; GISEL-NEXT:    v_lshl_b64 v[7:8], v[9:10], v4
1677; GISEL-NEXT:    v_lshl_b64 v[4:5], v[9:10], v6
1678; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v7
1679; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v8
1680; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v9
1681; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
1682; GISEL-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
1683; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v6
1684; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
1685; GISEL-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v9
1686; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
1687; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
1688; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v7
1689; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v8, vcc
1690; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v6
1691; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v6
1692; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
1693; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v6
1694; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1695; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1696; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
1697; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v13
1698; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v12
1699; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
1700; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1701; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
1702; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1703; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
1704; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
1705; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
1706; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v13
1707; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
1708; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1709; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
1710; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1711; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
1712; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
1713; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1714; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
1715; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
1716; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1717; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
1718; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v13, vcc
1719; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v6
1720; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v6
1721; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v9
1722; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v6
1723; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
1724; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1725; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v12
1726; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v10
1727; GISEL-NEXT:    v_mul_hi_u32 v14, v6, v12
1728; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
1729; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1730; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v14
1731; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1732; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
1733; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v10
1734; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
1735; GISEL-NEXT:    v_mul_hi_u32 v14, v6, v10
1736; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
1737; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1738; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
1739; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1740; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1741; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1742; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1743; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
1744; GISEL-NEXT:    v_mul_hi_u32 v10, v9, v10
1745; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
1746; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v11
1747; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
1748; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v6
1749; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
1750; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v6
1751; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1752; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1753; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
1754; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1755; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1756; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
1757; GISEL-NEXT:    v_mul_hi_u32 v6, v1, v6
1758; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
1759; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v11, v6
1760; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1761; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v12
1762; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1763; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1764; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
1765; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1766; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1767; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
1768; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
1769; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v6
1770; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v6
1771; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v9
1772; GISEL-NEXT:    v_mul_hi_u32 v6, v7, v6
1773; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
1774; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
1775; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
1776; GISEL-NEXT:    v_subb_u32_e64 v9, s[4:5], v1, v6, vcc
1777; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v6
1778; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v8
1779; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
1780; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v7
1781; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
1782; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v8
1783; GISEL-NEXT:    v_cndmask_b32_e64 v6, v6, v10, s[4:5]
1784; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], v0, v7
1785; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v8, vcc
1786; GISEL-NEXT:    v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5]
1787; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v8
1788; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
1789; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v7
1790; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
1791; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v8
1792; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
1793; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v10, v7
1794; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v1, v8, s[4:5]
1795; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1796; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
1797; GISEL-NEXT:    v_cndmask_b32_e32 v7, v10, v7, vcc
1798; GISEL-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
1799; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
1800; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
1801; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
1802; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v4
1803; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v5
1804; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v7
1805; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
1806; GISEL-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
1807; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v6
1808; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
1809; GISEL-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v7
1810; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
1811; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
1812; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v4
1813; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v5, vcc
1814; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v6
1815; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v6
1816; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v7
1817; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v6
1818; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1819; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
1820; GISEL-NEXT:    v_mul_lo_u32 v12, v7, v10
1821; GISEL-NEXT:    v_mul_lo_u32 v13, v6, v11
1822; GISEL-NEXT:    v_mul_hi_u32 v14, v6, v10
1823; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
1824; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1825; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
1826; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1827; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
1828; GISEL-NEXT:    v_mul_lo_u32 v13, v7, v11
1829; GISEL-NEXT:    v_mul_hi_u32 v10, v7, v10
1830; GISEL-NEXT:    v_mul_hi_u32 v14, v6, v11
1831; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
1832; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1833; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
1834; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1835; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
1836; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
1837; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1838; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
1839; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v11
1840; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1841; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
1842; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v11, vcc
1843; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v6
1844; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v6
1845; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v7
1846; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v6
1847; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1848; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1849; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v10
1850; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v8
1851; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v10
1852; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1853; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1854; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
1855; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1856; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
1857; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v8
1858; GISEL-NEXT:    v_mul_hi_u32 v10, v7, v10
1859; GISEL-NEXT:    v_mul_hi_u32 v12, v6, v8
1860; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1861; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1862; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
1863; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1864; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1865; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1866; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1867; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
1868; GISEL-NEXT:    v_mul_hi_u32 v8, v7, v8
1869; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1870; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
1871; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, v7, v8, vcc
1872; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v6
1873; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v7
1874; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v6
1875; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
1876; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1877; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1878; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1879; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1880; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v7
1881; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
1882; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v7
1883; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v9, v6
1884; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1885; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
1886; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1887; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
1888; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1889; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1890; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1891; GISEL-NEXT:    v_mul_hi_u32 v7, v3, v7
1892; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
1893; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v6
1894; GISEL-NEXT:    v_mul_lo_u32 v9, v5, v6
1895; GISEL-NEXT:    v_mul_lo_u32 v7, v4, v7
1896; GISEL-NEXT:    v_mul_hi_u32 v6, v4, v6
1897; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
1898; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
1899; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
1900; GISEL-NEXT:    v_subb_u32_e64 v7, s[4:5], v3, v6, vcc
1901; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v6
1902; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v5
1903; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
1904; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v4
1905; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
1906; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v5
1907; GISEL-NEXT:    v_cndmask_b32_e64 v6, v6, v8, s[4:5]
1908; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v2, v4
1909; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v5, vcc
1910; GISEL-NEXT:    v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5]
1911; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v5
1912; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
1913; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v4
1914; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
1915; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v5
1916; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
1917; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v8, v4
1918; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v5, s[4:5]
1919; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1920; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
1921; GISEL-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
1922; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
1923; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
1924; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1925; GISEL-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
1926; GISEL-NEXT:    s_setpc_b64 s[30:31]
1927;
1928; CGP-LABEL: v_urem_v2i64_pow2_shl_denom:
1929; CGP:       ; %bb.0:
1930; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1931; CGP-NEXT:    v_mov_b32_e32 v8, v0
1932; CGP-NEXT:    v_mov_b32_e32 v9, v1
1933; CGP-NEXT:    v_mov_b32_e32 v5, v2
1934; CGP-NEXT:    v_mov_b32_e32 v7, v3
1935; CGP-NEXT:    v_mov_b32_e32 v10, 0x1000
1936; CGP-NEXT:    v_mov_b32_e32 v11, 0
1937; CGP-NEXT:    v_mov_b32_e32 v0, 0
1938; CGP-NEXT:    v_lshl_b64 v[2:3], v[10:11], v4
1939; CGP-NEXT:    v_or_b32_e32 v1, v9, v3
1940; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1941; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
1942; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
1943; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1944; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
1945; CGP-NEXT:    s_cbranch_execz .LBB8_2
1946; CGP-NEXT:  ; %bb.1:
1947; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v3
1948; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
1949; CGP-NEXT:    v_subb_u32_e32 v12, vcc, 0, v3, vcc
1950; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v0
1951; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v4
1952; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
1953; CGP-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v0
1954; CGP-NEXT:    v_trunc_f32_e32 v4, v4
1955; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v4
1956; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
1957; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
1958; CGP-NEXT:    v_mul_lo_u32 v13, v1, v4
1959; CGP-NEXT:    v_mul_lo_u32 v14, v1, v0
1960; CGP-NEXT:    v_mul_lo_u32 v15, v12, v0
1961; CGP-NEXT:    v_mul_hi_u32 v16, v1, v0
1962; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v13
1963; CGP-NEXT:    v_mul_lo_u32 v15, v4, v14
1964; CGP-NEXT:    v_mul_hi_u32 v17, v0, v14
1965; CGP-NEXT:    v_mul_hi_u32 v14, v4, v14
1966; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
1967; CGP-NEXT:    v_mul_lo_u32 v16, v0, v13
1968; CGP-NEXT:    v_mul_lo_u32 v18, v4, v13
1969; CGP-NEXT:    v_mul_hi_u32 v19, v0, v13
1970; CGP-NEXT:    v_mul_hi_u32 v13, v4, v13
1971; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
1972; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1973; CGP-NEXT:    v_add_i32_e32 v14, vcc, v18, v14
1974; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1975; CGP-NEXT:    v_add_i32_e32 v15, vcc, v15, v17
1976; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1977; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
1978; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1979; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
1980; CGP-NEXT:    v_add_i32_e32 v16, vcc, v18, v17
1981; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
1982; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1983; CGP-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
1984; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1985; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
1986; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v13, vcc
1987; CGP-NEXT:    v_mul_lo_u32 v13, v1, v0
1988; CGP-NEXT:    v_mul_lo_u32 v12, v12, v0
1989; CGP-NEXT:    v_mul_hi_u32 v14, v1, v0
1990; CGP-NEXT:    v_mul_lo_u32 v1, v1, v4
1991; CGP-NEXT:    v_mul_lo_u32 v15, v4, v13
1992; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
1993; CGP-NEXT:    v_mul_hi_u32 v13, v4, v13
1994; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
1995; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
1996; CGP-NEXT:    v_mul_lo_u32 v12, v0, v1
1997; CGP-NEXT:    v_mul_lo_u32 v14, v4, v1
1998; CGP-NEXT:    v_mul_hi_u32 v17, v0, v1
1999; CGP-NEXT:    v_mul_hi_u32 v1, v4, v1
2000; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
2001; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2002; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
2003; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2004; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
2005; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2006; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
2007; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
2008; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
2009; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
2010; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2011; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2012; CGP-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
2013; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v13
2014; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v12
2015; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v4, v1, vcc
2016; CGP-NEXT:    v_mul_lo_u32 v4, v9, v0
2017; CGP-NEXT:    v_mul_hi_u32 v12, v8, v0
2018; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
2019; CGP-NEXT:    v_mul_lo_u32 v13, v8, v1
2020; CGP-NEXT:    v_mul_lo_u32 v14, v9, v1
2021; CGP-NEXT:    v_mul_hi_u32 v15, v8, v1
2022; CGP-NEXT:    v_mul_hi_u32 v1, v9, v1
2023; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v13
2024; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2025; CGP-NEXT:    v_add_i32_e32 v0, vcc, v14, v0
2026; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2027; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
2028; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2029; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v15
2030; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2031; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
2032; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
2033; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
2034; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2035; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
2036; CGP-NEXT:    v_mul_lo_u32 v12, v2, v0
2037; CGP-NEXT:    v_mul_lo_u32 v13, v3, v0
2038; CGP-NEXT:    v_mul_hi_u32 v0, v2, v0
2039; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
2040; CGP-NEXT:    v_mul_lo_u32 v1, v2, v1
2041; CGP-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
2042; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
2043; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v8, v12
2044; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v9, v0, vcc
2045; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v9, v0
2046; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v2
2047; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
2048; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v3
2049; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
2050; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v3, vcc
2051; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v3
2052; CGP-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
2053; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v1, v2
2054; CGP-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v0, vcc
2055; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v2
2056; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
2057; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v3, vcc
2058; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v12, v3
2059; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
2060; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v9, v2
2061; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
2062; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v12, v3
2063; CGP-NEXT:    v_cndmask_b32_e32 v3, v14, v13, vcc
2064; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
2065; CGP-NEXT:    v_cndmask_b32_e32 v2, v9, v2, vcc
2066; CGP-NEXT:    v_cndmask_b32_e32 v3, v12, v0, vcc
2067; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
2068; CGP-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
2069; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
2070; CGP-NEXT:    ; implicit-def: $vgpr4
2071; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
2072; CGP-NEXT:    ; implicit-def: $vgpr8
2073; CGP-NEXT:  .LBB8_2: ; %Flow1
2074; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
2075; CGP-NEXT:    v_lshl_b64 v[9:10], v[10:11], v6
2076; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]
2077; CGP-NEXT:    s_cbranch_execz .LBB8_4
2078; CGP-NEXT:  ; %bb.3:
2079; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v4
2080; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
2081; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2082; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
2083; CGP-NEXT:    v_mul_lo_u32 v1, v1, v0
2084; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
2085; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
2086; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
2087; CGP-NEXT:    v_mul_lo_u32 v0, v0, v2
2088; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v8, v0
2089; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
2090; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
2091; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2092; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
2093; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
2094; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2095; CGP-NEXT:    v_mov_b32_e32 v1, 0
2096; CGP-NEXT:  .LBB8_4:
2097; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
2098; CGP-NEXT:    v_or_b32_e32 v3, v7, v10
2099; CGP-NEXT:    v_mov_b32_e32 v2, 0
2100; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
2101; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v9
2102; CGP-NEXT:    ; implicit-def: $vgpr2_vgpr3
2103; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2104; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
2105; CGP-NEXT:    s_cbranch_execnz .LBB8_7
2106; CGP-NEXT:  ; %bb.5: ; %Flow
2107; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
2108; CGP-NEXT:    s_cbranch_execnz .LBB8_8
2109; CGP-NEXT:  .LBB8_6:
2110; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
2111; CGP-NEXT:    s_setpc_b64 s[30:31]
2112; CGP-NEXT:  .LBB8_7:
2113; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v10
2114; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v9
2115; CGP-NEXT:    v_subb_u32_e32 v6, vcc, 0, v10, vcc
2116; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v2
2117; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v4
2118; CGP-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
2119; CGP-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v2
2120; CGP-NEXT:    v_trunc_f32_e32 v4, v4
2121; CGP-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
2122; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
2123; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
2124; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
2125; CGP-NEXT:    v_mul_lo_u32 v11, v3, v2
2126; CGP-NEXT:    v_mul_lo_u32 v12, v6, v2
2127; CGP-NEXT:    v_mul_hi_u32 v13, v3, v2
2128; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
2129; CGP-NEXT:    v_mul_lo_u32 v12, v4, v11
2130; CGP-NEXT:    v_mul_hi_u32 v14, v2, v11
2131; CGP-NEXT:    v_mul_hi_u32 v11, v4, v11
2132; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v13
2133; CGP-NEXT:    v_mul_lo_u32 v13, v2, v8
2134; CGP-NEXT:    v_mul_lo_u32 v15, v4, v8
2135; CGP-NEXT:    v_mul_hi_u32 v16, v2, v8
2136; CGP-NEXT:    v_mul_hi_u32 v8, v4, v8
2137; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
2138; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2139; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
2140; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2141; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
2142; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2143; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
2144; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2145; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2146; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
2147; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2148; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2149; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2150; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
2151; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v11
2152; CGP-NEXT:    v_addc_u32_e32 v4, vcc, v4, v8, vcc
2153; CGP-NEXT:    v_mul_lo_u32 v8, v3, v2
2154; CGP-NEXT:    v_mul_lo_u32 v6, v6, v2
2155; CGP-NEXT:    v_mul_hi_u32 v11, v3, v2
2156; CGP-NEXT:    v_mul_lo_u32 v3, v3, v4
2157; CGP-NEXT:    v_mul_lo_u32 v12, v4, v8
2158; CGP-NEXT:    v_mul_hi_u32 v13, v2, v8
2159; CGP-NEXT:    v_mul_hi_u32 v8, v4, v8
2160; CGP-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
2161; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v11
2162; CGP-NEXT:    v_mul_lo_u32 v6, v2, v3
2163; CGP-NEXT:    v_mul_lo_u32 v11, v4, v3
2164; CGP-NEXT:    v_mul_hi_u32 v14, v2, v3
2165; CGP-NEXT:    v_mul_hi_u32 v3, v4, v3
2166; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
2167; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2168; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
2169; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2170; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v13
2171; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2172; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v14
2173; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2174; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
2175; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
2176; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
2177; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2178; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
2179; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v8
2180; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
2181; CGP-NEXT:    v_addc_u32_e32 v3, vcc, v4, v3, vcc
2182; CGP-NEXT:    v_mul_lo_u32 v4, v7, v2
2183; CGP-NEXT:    v_mul_hi_u32 v6, v5, v2
2184; CGP-NEXT:    v_mul_hi_u32 v2, v7, v2
2185; CGP-NEXT:    v_mul_lo_u32 v8, v5, v3
2186; CGP-NEXT:    v_mul_lo_u32 v11, v7, v3
2187; CGP-NEXT:    v_mul_hi_u32 v12, v5, v3
2188; CGP-NEXT:    v_mul_hi_u32 v3, v7, v3
2189; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
2190; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2191; CGP-NEXT:    v_add_i32_e32 v2, vcc, v11, v2
2192; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2193; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
2194; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2195; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v12
2196; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2197; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
2198; CGP-NEXT:    v_add_i32_e32 v6, vcc, v11, v6
2199; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
2200; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2201; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
2202; CGP-NEXT:    v_mul_lo_u32 v6, v9, v2
2203; CGP-NEXT:    v_mul_lo_u32 v8, v10, v2
2204; CGP-NEXT:    v_mul_hi_u32 v2, v9, v2
2205; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
2206; CGP-NEXT:    v_mul_lo_u32 v3, v9, v3
2207; CGP-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
2208; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
2209; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v5, v6
2210; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v7, v2, vcc
2211; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v7, v2
2212; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v9
2213; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
2214; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v10
2215; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
2216; CGP-NEXT:    v_subb_u32_e32 v2, vcc, v2, v10, vcc
2217; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v10
2218; CGP-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
2219; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v3, v9
2220; CGP-NEXT:    v_subbrev_u32_e64 v7, s[4:5], 0, v2, vcc
2221; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v9
2222; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
2223; CGP-NEXT:    v_subb_u32_e32 v2, vcc, v2, v10, vcc
2224; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v10
2225; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
2226; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v6, v9
2227; CGP-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
2228; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v7, v10
2229; CGP-NEXT:    v_cndmask_b32_e32 v8, v11, v8, vcc
2230; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
2231; CGP-NEXT:    v_cndmask_b32_e32 v6, v6, v9, vcc
2232; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v2, vcc
2233; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
2234; CGP-NEXT:    v_cndmask_b32_e32 v2, v3, v6, vcc
2235; CGP-NEXT:    v_cndmask_b32_e32 v3, v4, v7, vcc
2236; CGP-NEXT:    ; implicit-def: $vgpr4
2237; CGP-NEXT:    ; implicit-def: $vgpr9_vgpr10
2238; CGP-NEXT:    ; implicit-def: $vgpr5
2239; CGP-NEXT:    s_andn2_saveexec_b64 s[4:5], s[6:7]
2240; CGP-NEXT:    s_cbranch_execz .LBB8_6
2241; CGP-NEXT:  .LBB8_8:
2242; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v4
2243; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v9
2244; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
2245; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
2246; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
2247; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
2248; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
2249; CGP-NEXT:    v_mul_hi_u32 v2, v5, v2
2250; CGP-NEXT:    v_mul_lo_u32 v2, v2, v9
2251; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v5, v2
2252; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v9
2253; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v9
2254; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2255; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v9
2256; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v9
2257; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2258; CGP-NEXT:    v_mov_b32_e32 v3, 0
2259; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
2260; CGP-NEXT:    s_setpc_b64 s[30:31]
2261  %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
2262  %r = urem <2 x i64> %x, %shl.y
2263  ret <2 x i64> %r
2264}
2265
2266define i64 @v_urem_i64_24bit(i64 %num, i64 %den) {
2267; GISEL-LABEL: v_urem_i64_24bit:
2268; GISEL:       ; %bb.0:
2269; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2270; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2271; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v2
2272; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
2273; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
2274; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
2275; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
2276; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
2277; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
2278; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
2279; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
2280; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
2281; GISEL-NEXT:    v_mul_lo_u32 v2, v2, v1
2282; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
2283; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
2284; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
2285; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2286; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
2287; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
2288; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2289; GISEL-NEXT:    v_mov_b32_e32 v1, 0
2290; GISEL-NEXT:    s_setpc_b64 s[30:31]
2291;
2292; CGP-LABEL: v_urem_i64_24bit:
2293; CGP:       ; %bb.0:
2294; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2295; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2296; CGP-NEXT:    v_and_b32_e32 v1, 0xffffff, v2
2297; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v0
2298; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v1
2299; CGP-NEXT:    v_rcp_f32_e32 v4, v3
2300; CGP-NEXT:    v_mul_f32_e32 v4, v2, v4
2301; CGP-NEXT:    v_trunc_f32_e32 v4, v4
2302; CGP-NEXT:    v_mad_f32 v2, -v4, v3, v2
2303; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
2304; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, v3
2305; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
2306; CGP-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
2307; CGP-NEXT:    v_mul_lo_u32 v1, v2, v1
2308; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
2309; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2310; CGP-NEXT:    v_mov_b32_e32 v1, 0
2311; CGP-NEXT:    s_setpc_b64 s[30:31]
2312  %num.mask = and i64 %num, 16777215
2313  %den.mask = and i64 %den, 16777215
2314  %result = urem i64 %num.mask, %den.mask
2315  ret i64 %result
2316}
2317
2318define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
2319; GISEL-LABEL: v_urem_v2i64_24bit:
2320; GISEL:       ; %bb.0:
2321; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2322; GISEL-NEXT:    v_and_b32_e32 v3, 0xffffff, v4
2323; GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v6
2324; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v6, 0
2325; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v3
2326; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, 0, v3
2327; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], 0, 0, vcc
2328; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v1
2329; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v1
2330; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
2331; GISEL-NEXT:    v_mac_f32_e32 v7, 0x4f800000, v6
2332; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v6
2333; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v7
2334; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v8
2335; GISEL-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
2336; GISEL-NEXT:    v_mul_f32_e32 v7, 0x5f7ffffc, v7
2337; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v6
2338; GISEL-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v7
2339; GISEL-NEXT:    v_trunc_f32_e32 v8, v8
2340; GISEL-NEXT:    v_trunc_f32_e32 v11, v11
2341; GISEL-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v8
2342; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
2343; GISEL-NEXT:    v_mac_f32_e32 v7, 0xcf800000, v11
2344; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
2345; GISEL-NEXT:    v_cvt_u32_f32_e32 v12, v6
2346; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v8
2347; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
2348; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v11
2349; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v12
2350; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v12
2351; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v12
2352; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v7
2353; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v7
2354; GISEL-NEXT:    v_mul_hi_u32 v19, v9, v7
2355; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v15, v6
2356; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v13
2357; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v17
2358; GISEL-NEXT:    v_mul_hi_u32 v18, v7, v17
2359; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v19
2360; GISEL-NEXT:    v_mul_lo_u32 v19, v7, v13
2361; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v19
2362; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
2363; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
2364; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v14
2365; GISEL-NEXT:    v_mul_hi_u32 v18, v12, v14
2366; GISEL-NEXT:    v_mul_hi_u32 v14, v8, v14
2367; GISEL-NEXT:    v_mul_hi_u32 v17, v11, v17
2368; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v6, v16
2369; GISEL-NEXT:    v_mul_lo_u32 v6, v12, v16
2370; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v15, v6
2371; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2372; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v18
2373; GISEL-NEXT:    v_mul_lo_u32 v6, v8, v16
2374; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
2375; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
2376; GISEL-NEXT:    v_mul_hi_u32 v18, v12, v16
2377; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v14
2378; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2379; GISEL-NEXT:    v_add_i32_e64 v18, s[4:5], v6, v18
2380; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
2381; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v6
2382; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2383; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v19, v6
2384; GISEL-NEXT:    v_mul_lo_u32 v6, v11, v13
2385; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v17
2386; GISEL-NEXT:    v_mul_hi_u32 v17, v7, v13
2387; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
2388; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v6, v17
2389; GISEL-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2390; GISEL-NEXT:    v_add_i32_e32 v20, vcc, v20, v6
2391; GISEL-NEXT:    v_and_b32_e32 v6, 0xffffff, v0
2392; GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v2
2393; GISEL-NEXT:    v_mul_hi_u32 v2, v8, v16
2394; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
2395; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v18, v15
2396; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
2397; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
2398; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
2399; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
2400; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v20, v18
2401; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v14
2402; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
2403; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
2404; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v8, v2, vcc
2405; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v12
2406; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v12
2407; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v12
2408; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v17
2409; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, v11, v13, vcc
2410; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v7
2411; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v7
2412; GISEL-NEXT:    v_mul_hi_u32 v15, v9, v7
2413; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v2
2414; GISEL-NEXT:    v_mul_lo_u32 v16, v2, v8
2415; GISEL-NEXT:    v_mul_hi_u32 v17, v12, v8
2416; GISEL-NEXT:    v_mul_hi_u32 v8, v2, v8
2417; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v11
2418; GISEL-NEXT:    v_mul_lo_u32 v18, v11, v13
2419; GISEL-NEXT:    v_mul_hi_u32 v19, v7, v13
2420; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
2421; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
2422; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v10, v9
2423; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
2424; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v15
2425; GISEL-NEXT:    v_mul_lo_u32 v9, v12, v4
2426; GISEL-NEXT:    v_mul_lo_u32 v10, v2, v4
2427; GISEL-NEXT:    v_mul_hi_u32 v14, v12, v4
2428; GISEL-NEXT:    v_mul_hi_u32 v4, v2, v4
2429; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v5
2430; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v18, v15
2431; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
2432; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v19
2433; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v5
2434; GISEL-NEXT:    v_mul_hi_u32 v19, v7, v5
2435; GISEL-NEXT:    v_mul_hi_u32 v5, v11, v5
2436; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v16, v9
2437; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
2438; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
2439; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
2440; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v15, v13
2441; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2442; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v17
2443; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
2444; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v14
2445; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2446; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
2447; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v19
2448; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
2449; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v16, v9
2450; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
2451; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v18, v17
2452; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v19
2453; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
2454; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2455; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2456; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2457; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
2458; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v15, v14
2459; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
2460; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
2461; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
2462; GISEL-NEXT:    v_addc_u32_e32 v2, vcc, v2, v4, vcc
2463; GISEL-NEXT:    v_mul_lo_u32 v4, 0, v8
2464; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v8
2465; GISEL-NEXT:    v_mul_hi_u32 v8, 0, v8
2466; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v13
2467; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v11, v5, vcc
2468; GISEL-NEXT:    v_mul_lo_u32 v10, 0, v7
2469; GISEL-NEXT:    v_mul_hi_u32 v11, v0, v7
2470; GISEL-NEXT:    v_mul_hi_u32 v7, 0, v7
2471; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v2
2472; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v2
2473; GISEL-NEXT:    v_mul_hi_u32 v14, v6, v2
2474; GISEL-NEXT:    v_mul_hi_u32 v2, 0, v2
2475; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v5
2476; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v5
2477; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v5
2478; GISEL-NEXT:    v_mul_hi_u32 v5, 0, v5
2479; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
2480; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v13, v8
2481; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v15
2482; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v16, v7
2483; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
2484; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2485; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v14
2486; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2487; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
2488; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2489; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v17
2490; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2491; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
2492; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2493; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
2494; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2495; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
2496; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v4
2497; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v4
2498; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
2499; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2500; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v7
2501; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v7
2502; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
2503; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
2504; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
2505; GISEL-NEXT:    v_mul_lo_u32 v2, v3, v2
2506; GISEL-NEXT:    v_mul_lo_u32 v5, v1, v5
2507; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v12, v2
2508; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v13, v5
2509; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
2510; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v7
2511; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v6, v9
2512; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], 0, v2, vcc
2513; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], 0, v2
2514; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v3
2515; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
2516; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v0, v11
2517; GISEL-NEXT:    v_subb_u32_e64 v9, s[6:7], 0, v4, s[4:5]
2518; GISEL-NEXT:    v_sub_i32_e64 v0, s[6:7], 0, v4
2519; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v8, v1
2520; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[6:7]
2521; GISEL-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v6
2522; GISEL-NEXT:    v_cndmask_b32_e64 v7, -1, v7, s[6:7]
2523; GISEL-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
2524; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
2525; GISEL-NEXT:    v_cndmask_b32_e32 v4, -1, v4, vcc
2526; GISEL-NEXT:    v_subbrev_u32_e64 v0, vcc, 0, v0, s[4:5]
2527; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v5, v3
2528; GISEL-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
2529; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v3
2530; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
2531; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, v8, v1
2532; GISEL-NEXT:    v_subbrev_u32_e32 v13, vcc, 0, v0, vcc
2533; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v12, v1
2534; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
2535; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
2536; GISEL-NEXT:    v_cndmask_b32_e32 v11, -1, v11, vcc
2537; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v10, v3
2538; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v2, vcc
2539; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v13
2540; GISEL-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
2541; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v12, v1
2542; GISEL-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v13, vcc
2543; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
2544; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
2545; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
2546; GISEL-NEXT:    v_cndmask_b32_e64 v1, v12, v1, s[4:5]
2547; GISEL-NEXT:    v_cndmask_b32_e32 v10, v2, v14, vcc
2548; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
2549; GISEL-NEXT:    v_cndmask_b32_e32 v0, v5, v3, vcc
2550; GISEL-NEXT:    v_cndmask_b32_e64 v3, v13, v15, s[4:5]
2551; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v4
2552; GISEL-NEXT:    v_cndmask_b32_e64 v2, v8, v1, s[4:5]
2553; GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v10, vcc
2554; GISEL-NEXT:    v_cndmask_b32_e64 v3, v9, v3, s[4:5]
2555; GISEL-NEXT:    s_setpc_b64 s[30:31]
2556;
2557; CGP-LABEL: v_urem_v2i64_24bit:
2558; CGP:       ; %bb.0:
2559; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2560; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2561; CGP-NEXT:    v_and_b32_e32 v1, 0xffffff, v4
2562; CGP-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2563; CGP-NEXT:    v_and_b32_e32 v3, 0xffffff, v6
2564; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v0
2565; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v1
2566; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v2
2567; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
2568; CGP-NEXT:    v_rcp_f32_e32 v8, v5
2569; CGP-NEXT:    v_rcp_f32_e32 v9, v7
2570; CGP-NEXT:    v_mul_f32_e32 v8, v4, v8
2571; CGP-NEXT:    v_mul_f32_e32 v9, v6, v9
2572; CGP-NEXT:    v_trunc_f32_e32 v8, v8
2573; CGP-NEXT:    v_trunc_f32_e32 v9, v9
2574; CGP-NEXT:    v_mad_f32 v4, -v8, v5, v4
2575; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
2576; CGP-NEXT:    v_mad_f32 v6, -v9, v7, v6
2577; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v9
2578; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v4|, v5
2579; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
2580; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v6|, v7
2581; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]
2582; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
2583; CGP-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
2584; CGP-NEXT:    v_mul_lo_u32 v1, v4, v1
2585; CGP-NEXT:    v_mul_lo_u32 v3, v5, v3
2586; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
2587; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v2, v3
2588; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2589; CGP-NEXT:    v_and_b32_e32 v2, 0xffffff, v1
2590; CGP-NEXT:    v_mov_b32_e32 v1, 0
2591; CGP-NEXT:    v_mov_b32_e32 v3, 0
2592; CGP-NEXT:    s_setpc_b64 s[30:31]
2593  %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
2594  %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
2595  %result = urem <2 x i64> %num.mask, %den.mask
2596  ret <2 x i64> %result
2597}
2598