xref: /llvm-project/llvm/test/CodeGen/AMDGPU/div_v2i128.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDAG %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GISEL %s
4
5define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
6; SDAG-LABEL: v_sdiv_v2i128_vv:
7; SDAG:       ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
8; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; SDAG-NEXT:    v_sub_i32_e32 v16, vcc, 0, v0
10; SDAG-NEXT:    v_mov_b32_e32 v18, 0
11; SDAG-NEXT:    v_ashrrev_i32_e32 v24, 31, v3
12; SDAG-NEXT:    v_ashrrev_i32_e32 v25, 31, v11
13; SDAG-NEXT:    s_mov_b64 s[10:11], 0x7f
14; SDAG-NEXT:    v_subb_u32_e32 v17, vcc, 0, v1, vcc
15; SDAG-NEXT:    v_mov_b32_e32 v26, v24
16; SDAG-NEXT:    v_mov_b32_e32 v27, v25
17; SDAG-NEXT:    v_subb_u32_e32 v19, vcc, 0, v2, vcc
18; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
19; SDAG-NEXT:    v_cndmask_b32_e64 v21, v1, v17, s[4:5]
20; SDAG-NEXT:    v_cndmask_b32_e64 v20, v0, v16, s[4:5]
21; SDAG-NEXT:    v_subb_u32_e32 v0, vcc, 0, v3, vcc
22; SDAG-NEXT:    v_cndmask_b32_e64 v16, v2, v19, s[4:5]
23; SDAG-NEXT:    v_ffbh_u32_e32 v1, v20
24; SDAG-NEXT:    v_ffbh_u32_e32 v2, v21
25; SDAG-NEXT:    v_cndmask_b32_e64 v17, v3, v0, s[4:5]
26; SDAG-NEXT:    v_or_b32_e32 v0, v20, v16
27; SDAG-NEXT:    v_sub_i32_e32 v3, vcc, 0, v8
28; SDAG-NEXT:    v_add_i32_e64 v19, s[4:5], 32, v1
29; SDAG-NEXT:    v_ffbh_u32_e32 v22, v16
30; SDAG-NEXT:    v_or_b32_e32 v1, v21, v17
31; SDAG-NEXT:    v_subb_u32_e32 v23, vcc, 0, v9, vcc
32; SDAG-NEXT:    v_min_u32_e32 v2, v19, v2
33; SDAG-NEXT:    v_add_i32_e64 v19, s[4:5], 32, v22
34; SDAG-NEXT:    v_ffbh_u32_e32 v22, v17
35; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
36; SDAG-NEXT:    v_cmp_gt_i64_e64 s[6:7], 0, v[10:11]
37; SDAG-NEXT:    v_cndmask_b32_e64 v28, v9, v23, s[6:7]
38; SDAG-NEXT:    v_subb_u32_e32 v0, vcc, 0, v10, vcc
39; SDAG-NEXT:    v_cndmask_b32_e64 v29, v8, v3, s[6:7]
40; SDAG-NEXT:    v_min_u32_e32 v1, v19, v22
41; SDAG-NEXT:    v_add_i32_e64 v2, s[8:9], 64, v2
42; SDAG-NEXT:    v_addc_u32_e64 v3, s[8:9], 0, 0, s[8:9]
43; SDAG-NEXT:    v_subb_u32_e32 v8, vcc, 0, v11, vcc
44; SDAG-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[6:7]
45; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[16:17]
46; SDAG-NEXT:    v_cndmask_b32_e64 v9, v3, 0, vcc
47; SDAG-NEXT:    v_cndmask_b32_e32 v10, v2, v1, vcc
48; SDAG-NEXT:    v_ffbh_u32_e32 v3, v29
49; SDAG-NEXT:    v_ffbh_u32_e32 v19, v28
50; SDAG-NEXT:    v_cndmask_b32_e64 v1, v11, v8, s[6:7]
51; SDAG-NEXT:    v_or_b32_e32 v2, v29, v0
52; SDAG-NEXT:    v_add_i32_e32 v8, vcc, 32, v3
53; SDAG-NEXT:    v_ffbh_u32_e32 v11, v0
54; SDAG-NEXT:    v_or_b32_e32 v3, v28, v1
55; SDAG-NEXT:    v_min_u32_e32 v8, v8, v19
56; SDAG-NEXT:    v_add_i32_e32 v11, vcc, 32, v11
57; SDAG-NEXT:    v_ffbh_u32_e32 v19, v1
58; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
59; SDAG-NEXT:    v_min_u32_e32 v2, v11, v19
60; SDAG-NEXT:    v_add_i32_e64 v3, s[6:7], 64, v8
61; SDAG-NEXT:    v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7]
62; SDAG-NEXT:    v_cmp_ne_u64_e64 s[6:7], 0, v[0:1]
63; SDAG-NEXT:    v_cndmask_b32_e64 v8, v8, 0, s[6:7]
64; SDAG-NEXT:    v_cndmask_b32_e64 v2, v3, v2, s[6:7]
65; SDAG-NEXT:    s_or_b64 s[6:7], vcc, s[4:5]
66; SDAG-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
67; SDAG-NEXT:    v_subb_u32_e32 v3, vcc, v8, v9, vcc
68; SDAG-NEXT:    v_xor_b32_e32 v8, 0x7f, v2
69; SDAG-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v18, vcc
70; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[10:11], v[2:3]
71; SDAG-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
72; SDAG-NEXT:    v_subbrev_u32_e32 v11, vcc, 0, v18, vcc
73; SDAG-NEXT:    v_or_b32_e32 v8, v8, v10
74; SDAG-NEXT:    v_or_b32_e32 v9, v3, v11
75; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
76; SDAG-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
77; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
78; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[10:11]
79; SDAG-NEXT:    v_cndmask_b32_e64 v8, v18, v19, s[4:5]
80; SDAG-NEXT:    v_and_b32_e32 v8, 1, v8
81; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v8
82; SDAG-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
83; SDAG-NEXT:    v_cndmask_b32_e64 v18, v17, 0, s[4:5]
84; SDAG-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
85; SDAG-NEXT:    v_cndmask_b32_e64 v22, v16, 0, s[4:5]
86; SDAG-NEXT:    v_cndmask_b32_e64 v19, v21, 0, s[4:5]
87; SDAG-NEXT:    s_and_b64 s[8:9], s[6:7], vcc
88; SDAG-NEXT:    v_cndmask_b32_e64 v23, v20, 0, s[4:5]
89; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[8:9]
90; SDAG-NEXT:    s_cbranch_execz .LBB0_6
91; SDAG-NEXT:  ; %bb.1: ; %udiv-bb15
92; SDAG-NEXT:    v_add_i32_e32 v30, vcc, 1, v2
93; SDAG-NEXT:    v_sub_i32_e64 v18, s[4:5], 63, v2
94; SDAG-NEXT:    v_mov_b32_e32 v8, 0
95; SDAG-NEXT:    v_mov_b32_e32 v9, 0
96; SDAG-NEXT:    v_addc_u32_e32 v31, vcc, 0, v3, vcc
97; SDAG-NEXT:    v_lshl_b64 v[18:19], v[20:21], v18
98; SDAG-NEXT:    v_addc_u32_e32 v32, vcc, 0, v10, vcc
99; SDAG-NEXT:    v_addc_u32_e32 v33, vcc, 0, v11, vcc
100; SDAG-NEXT:    v_or_b32_e32 v10, v30, v32
101; SDAG-NEXT:    v_sub_i32_e32 v34, vcc, 0x7f, v2
102; SDAG-NEXT:    v_or_b32_e32 v11, v31, v33
103; SDAG-NEXT:    v_lshl_b64 v[2:3], v[16:17], v34
104; SDAG-NEXT:    v_sub_i32_e32 v35, vcc, 64, v34
105; SDAG-NEXT:    v_lshl_b64 v[22:23], v[20:21], v34
106; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
107; SDAG-NEXT:    v_lshr_b64 v[10:11], v[20:21], v35
108; SDAG-NEXT:    v_or_b32_e32 v3, v3, v11
109; SDAG-NEXT:    v_or_b32_e32 v2, v2, v10
110; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v34
111; SDAG-NEXT:    v_cndmask_b32_e64 v3, v19, v3, s[4:5]
112; SDAG-NEXT:    v_cndmask_b32_e64 v2, v18, v2, s[4:5]
113; SDAG-NEXT:    v_cndmask_b32_e64 v19, 0, v23, s[4:5]
114; SDAG-NEXT:    v_cndmask_b32_e64 v18, 0, v22, s[4:5]
115; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v34
116; SDAG-NEXT:    v_cndmask_b32_e64 v3, v3, v17, s[4:5]
117; SDAG-NEXT:    v_cndmask_b32_e64 v2, v2, v16, s[4:5]
118; SDAG-NEXT:    v_mov_b32_e32 v10, 0
119; SDAG-NEXT:    v_mov_b32_e32 v11, 0
120; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
121; SDAG-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
122; SDAG-NEXT:    s_cbranch_execz .LBB0_5
123; SDAG-NEXT:  ; %bb.2: ; %udiv-preheader4
124; SDAG-NEXT:    v_lshr_b64 v[8:9], v[20:21], v30
125; SDAG-NEXT:    v_sub_i32_e32 v10, vcc, 64, v30
126; SDAG-NEXT:    v_lshl_b64 v[10:11], v[16:17], v10
127; SDAG-NEXT:    v_or_b32_e32 v11, v9, v11
128; SDAG-NEXT:    v_or_b32_e32 v10, v8, v10
129; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v30
130; SDAG-NEXT:    v_subrev_i32_e64 v8, s[4:5], 64, v30
131; SDAG-NEXT:    v_lshr_b64 v[8:9], v[16:17], v8
132; SDAG-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
133; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v30
134; SDAG-NEXT:    v_cndmask_b32_e64 v21, v9, v21, s[4:5]
135; SDAG-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
136; SDAG-NEXT:    v_cndmask_b32_e64 v20, v8, v20, s[4:5]
137; SDAG-NEXT:    v_lshr_b64 v[8:9], v[16:17], v30
138; SDAG-NEXT:    v_cndmask_b32_e32 v23, 0, v9, vcc
139; SDAG-NEXT:    v_cndmask_b32_e32 v22, 0, v8, vcc
140; SDAG-NEXT:    v_add_i32_e32 v34, vcc, -1, v29
141; SDAG-NEXT:    v_addc_u32_e32 v35, vcc, -1, v28, vcc
142; SDAG-NEXT:    v_addc_u32_e32 v36, vcc, -1, v0, vcc
143; SDAG-NEXT:    v_addc_u32_e32 v37, vcc, -1, v1, vcc
144; SDAG-NEXT:    s_mov_b64 s[4:5], 0
145; SDAG-NEXT:    v_mov_b32_e32 v16, 0
146; SDAG-NEXT:    v_mov_b32_e32 v17, 0
147; SDAG-NEXT:    v_mov_b32_e32 v10, 0
148; SDAG-NEXT:    v_mov_b32_e32 v11, 0
149; SDAG-NEXT:    v_mov_b32_e32 v9, 0
150; SDAG-NEXT:  .LBB0_3: ; %udiv-do-while3
151; SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
152; SDAG-NEXT:    v_lshrrev_b32_e32 v8, 31, v19
153; SDAG-NEXT:    v_lshl_b64 v[18:19], v[18:19], 1
154; SDAG-NEXT:    v_lshl_b64 v[22:23], v[22:23], 1
155; SDAG-NEXT:    v_lshrrev_b32_e32 v38, 31, v21
156; SDAG-NEXT:    v_lshl_b64 v[20:21], v[20:21], 1
157; SDAG-NEXT:    v_lshrrev_b32_e32 v39, 31, v3
158; SDAG-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
159; SDAG-NEXT:    v_or_b32_e32 v19, v17, v19
160; SDAG-NEXT:    v_or_b32_e32 v18, v16, v18
161; SDAG-NEXT:    v_or_b32_e32 v16, v22, v38
162; SDAG-NEXT:    v_or_b32_e32 v17, v20, v39
163; SDAG-NEXT:    v_or_b32_e32 v2, v2, v8
164; SDAG-NEXT:    v_sub_i32_e32 v8, vcc, v34, v17
165; SDAG-NEXT:    v_subb_u32_e32 v8, vcc, v35, v21, vcc
166; SDAG-NEXT:    v_subb_u32_e32 v8, vcc, v36, v16, vcc
167; SDAG-NEXT:    v_subb_u32_e32 v8, vcc, v37, v23, vcc
168; SDAG-NEXT:    v_ashrrev_i32_e32 v8, 31, v8
169; SDAG-NEXT:    v_and_b32_e32 v20, v8, v29
170; SDAG-NEXT:    v_and_b32_e32 v22, v8, v28
171; SDAG-NEXT:    v_and_b32_e32 v38, v8, v0
172; SDAG-NEXT:    v_and_b32_e32 v39, v8, v1
173; SDAG-NEXT:    v_and_b32_e32 v8, 1, v8
174; SDAG-NEXT:    v_sub_i32_e32 v20, vcc, v17, v20
175; SDAG-NEXT:    v_subb_u32_e32 v21, vcc, v21, v22, vcc
176; SDAG-NEXT:    v_subb_u32_e32 v22, vcc, v16, v38, vcc
177; SDAG-NEXT:    v_subb_u32_e32 v23, vcc, v23, v39, vcc
178; SDAG-NEXT:    v_add_i32_e32 v30, vcc, -1, v30
179; SDAG-NEXT:    v_addc_u32_e32 v31, vcc, -1, v31, vcc
180; SDAG-NEXT:    v_addc_u32_e32 v32, vcc, -1, v32, vcc
181; SDAG-NEXT:    v_addc_u32_e32 v33, vcc, -1, v33, vcc
182; SDAG-NEXT:    v_or_b32_e32 v16, v30, v32
183; SDAG-NEXT:    v_or_b32_e32 v17, v31, v33
184; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
185; SDAG-NEXT:    v_or_b32_e32 v3, v11, v3
186; SDAG-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
187; SDAG-NEXT:    v_or_b32_e32 v2, v10, v2
188; SDAG-NEXT:    v_mov_b32_e32 v17, v9
189; SDAG-NEXT:    v_mov_b32_e32 v16, v8
190; SDAG-NEXT:    s_andn2_b64 exec, exec, s[4:5]
191; SDAG-NEXT:    s_cbranch_execnz .LBB0_3
192; SDAG-NEXT:  ; %bb.4: ; %Flow13
193; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
194; SDAG-NEXT:  .LBB0_5: ; %Flow14
195; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
196; SDAG-NEXT:    v_lshl_b64 v[0:1], v[2:3], 1
197; SDAG-NEXT:    v_lshrrev_b32_e32 v16, 31, v19
198; SDAG-NEXT:    v_lshl_b64 v[2:3], v[18:19], 1
199; SDAG-NEXT:    v_or_b32_e32 v0, v0, v16
200; SDAG-NEXT:    v_or_b32_e32 v18, v11, v1
201; SDAG-NEXT:    v_or_b32_e32 v19, v9, v3
202; SDAG-NEXT:    v_or_b32_e32 v22, v10, v0
203; SDAG-NEXT:    v_or_b32_e32 v23, v8, v2
204; SDAG-NEXT:  .LBB0_6: ; %Flow16
205; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
206; SDAG-NEXT:    v_ashrrev_i32_e32 v16, 31, v7
207; SDAG-NEXT:    v_ashrrev_i32_e32 v17, 31, v15
208; SDAG-NEXT:    v_sub_i32_e32 v0, vcc, 0, v4
209; SDAG-NEXT:    v_mov_b32_e32 v8, 0
210; SDAG-NEXT:    s_mov_b64 s[10:11], 0x7f
211; SDAG-NEXT:    v_mov_b32_e32 v20, v16
212; SDAG-NEXT:    v_mov_b32_e32 v21, v17
213; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, 0, v5, vcc
214; SDAG-NEXT:    v_subb_u32_e32 v9, vcc, 0, v6, vcc
215; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[6:7]
216; SDAG-NEXT:    v_cndmask_b32_e64 v3, v5, v1, s[4:5]
217; SDAG-NEXT:    v_cndmask_b32_e64 v2, v4, v0, s[4:5]
218; SDAG-NEXT:    v_subb_u32_e32 v0, vcc, 0, v7, vcc
219; SDAG-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[4:5]
220; SDAG-NEXT:    v_ffbh_u32_e32 v1, v2
221; SDAG-NEXT:    v_ffbh_u32_e32 v4, v3
222; SDAG-NEXT:    v_cndmask_b32_e64 v7, v7, v0, s[4:5]
223; SDAG-NEXT:    v_sub_i32_e32 v5, vcc, 0, v12
224; SDAG-NEXT:    v_or_b32_e32 v0, v2, v6
225; SDAG-NEXT:    v_ffbh_u32_e32 v9, v6
226; SDAG-NEXT:    v_add_i32_e64 v10, s[4:5], 32, v1
227; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, 0, v13, vcc
228; SDAG-NEXT:    v_or_b32_e32 v1, v3, v7
229; SDAG-NEXT:    v_add_i32_e64 v9, s[4:5], 32, v9
230; SDAG-NEXT:    v_ffbh_u32_e32 v30, v7
231; SDAG-NEXT:    v_min_u32_e32 v4, v10, v4
232; SDAG-NEXT:    v_subb_u32_e32 v10, vcc, 0, v14, vcc
233; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[14:15]
234; SDAG-NEXT:    v_cndmask_b32_e64 v28, v13, v11, s[4:5]
235; SDAG-NEXT:    v_cndmask_b32_e64 v29, v12, v5, s[4:5]
236; SDAG-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[0:1]
237; SDAG-NEXT:    v_min_u32_e32 v1, v9, v30
238; SDAG-NEXT:    v_add_i32_e64 v4, s[8:9], 64, v4
239; SDAG-NEXT:    v_addc_u32_e64 v5, s[8:9], 0, 0, s[8:9]
240; SDAG-NEXT:    v_subb_u32_e32 v9, vcc, 0, v15, vcc
241; SDAG-NEXT:    v_cndmask_b32_e64 v0, v14, v10, s[4:5]
242; SDAG-NEXT:    v_ffbh_u32_e32 v10, v29
243; SDAG-NEXT:    v_ffbh_u32_e32 v11, v28
244; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
245; SDAG-NEXT:    v_cndmask_b32_e64 v12, v5, 0, vcc
246; SDAG-NEXT:    v_cndmask_b32_e32 v13, v4, v1, vcc
247; SDAG-NEXT:    v_cndmask_b32_e64 v1, v15, v9, s[4:5]
248; SDAG-NEXT:    v_or_b32_e32 v4, v29, v0
249; SDAG-NEXT:    v_ffbh_u32_e32 v9, v0
250; SDAG-NEXT:    v_add_i32_e32 v10, vcc, 32, v10
251; SDAG-NEXT:    v_or_b32_e32 v5, v28, v1
252; SDAG-NEXT:    v_add_i32_e32 v9, vcc, 32, v9
253; SDAG-NEXT:    v_ffbh_u32_e32 v14, v1
254; SDAG-NEXT:    v_min_u32_e32 v10, v10, v11
255; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
256; SDAG-NEXT:    v_min_u32_e32 v4, v9, v14
257; SDAG-NEXT:    v_add_i32_e64 v5, s[4:5], 64, v10
258; SDAG-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
259; SDAG-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
260; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
261; SDAG-NEXT:    v_cndmask_b32_e64 v9, v9, 0, vcc
262; SDAG-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
263; SDAG-NEXT:    v_sub_i32_e32 v4, vcc, v4, v13
264; SDAG-NEXT:    v_subb_u32_e32 v5, vcc, v9, v12, vcc
265; SDAG-NEXT:    v_xor_b32_e32 v9, 0x7f, v4
266; SDAG-NEXT:    v_subbrev_u32_e32 v10, vcc, 0, v8, vcc
267; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[10:11], v[4:5]
268; SDAG-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
269; SDAG-NEXT:    v_subbrev_u32_e32 v11, vcc, 0, v8, vcc
270; SDAG-NEXT:    v_or_b32_e32 v8, v9, v10
271; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
272; SDAG-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
273; SDAG-NEXT:    v_or_b32_e32 v9, v5, v11
274; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
275; SDAG-NEXT:    v_cndmask_b32_e32 v12, v13, v12, vcc
276; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
277; SDAG-NEXT:    v_and_b32_e32 v8, 1, v12
278; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v8
279; SDAG-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
280; SDAG-NEXT:    v_cndmask_b32_e64 v13, v7, 0, s[4:5]
281; SDAG-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
282; SDAG-NEXT:    v_cndmask_b32_e64 v9, v6, 0, s[4:5]
283; SDAG-NEXT:    v_cndmask_b32_e64 v14, v3, 0, s[4:5]
284; SDAG-NEXT:    v_cndmask_b32_e64 v8, v2, 0, s[4:5]
285; SDAG-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
286; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
287; SDAG-NEXT:    s_cbranch_execz .LBB0_12
288; SDAG-NEXT:  ; %bb.7: ; %udiv-bb1
289; SDAG-NEXT:    v_add_i32_e32 v30, vcc, 1, v4
290; SDAG-NEXT:    v_sub_i32_e64 v12, s[4:5], 63, v4
291; SDAG-NEXT:    v_mov_b32_e32 v8, 0
292; SDAG-NEXT:    v_mov_b32_e32 v9, 0
293; SDAG-NEXT:    v_addc_u32_e32 v31, vcc, 0, v5, vcc
294; SDAG-NEXT:    v_lshl_b64 v[12:13], v[2:3], v12
295; SDAG-NEXT:    v_addc_u32_e32 v32, vcc, 0, v10, vcc
296; SDAG-NEXT:    v_addc_u32_e32 v33, vcc, 0, v11, vcc
297; SDAG-NEXT:    v_or_b32_e32 v10, v30, v32
298; SDAG-NEXT:    v_sub_i32_e32 v34, vcc, 0x7f, v4
299; SDAG-NEXT:    v_or_b32_e32 v11, v31, v33
300; SDAG-NEXT:    v_lshl_b64 v[4:5], v[6:7], v34
301; SDAG-NEXT:    v_sub_i32_e32 v35, vcc, 64, v34
302; SDAG-NEXT:    v_lshl_b64 v[14:15], v[2:3], v34
303; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
304; SDAG-NEXT:    v_lshr_b64 v[10:11], v[2:3], v35
305; SDAG-NEXT:    v_or_b32_e32 v5, v5, v11
306; SDAG-NEXT:    v_or_b32_e32 v4, v4, v10
307; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v34
308; SDAG-NEXT:    v_cndmask_b32_e64 v5, v13, v5, s[4:5]
309; SDAG-NEXT:    v_cndmask_b32_e64 v4, v12, v4, s[4:5]
310; SDAG-NEXT:    v_cndmask_b32_e64 v11, 0, v15, s[4:5]
311; SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, v14, s[4:5]
312; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v34
313; SDAG-NEXT:    v_cndmask_b32_e64 v5, v5, v7, s[4:5]
314; SDAG-NEXT:    v_cndmask_b32_e64 v4, v4, v6, s[4:5]
315; SDAG-NEXT:    v_mov_b32_e32 v12, 0
316; SDAG-NEXT:    v_mov_b32_e32 v13, 0
317; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
318; SDAG-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
319; SDAG-NEXT:    s_cbranch_execz .LBB0_11
320; SDAG-NEXT:  ; %bb.8: ; %udiv-preheader
321; SDAG-NEXT:    v_lshr_b64 v[8:9], v[2:3], v30
322; SDAG-NEXT:    v_sub_i32_e32 v35, vcc, 64, v30
323; SDAG-NEXT:    v_subrev_i32_e32 v36, vcc, 64, v30
324; SDAG-NEXT:    v_lshr_b64 v[37:38], v[6:7], v30
325; SDAG-NEXT:    v_add_i32_e32 v34, vcc, -1, v29
326; SDAG-NEXT:    s_mov_b64 s[10:11], 0
327; SDAG-NEXT:    v_mov_b32_e32 v14, 0
328; SDAG-NEXT:    v_mov_b32_e32 v15, 0
329; SDAG-NEXT:    v_mov_b32_e32 v12, 0
330; SDAG-NEXT:    v_mov_b32_e32 v13, 0
331; SDAG-NEXT:    v_lshl_b64 v[48:49], v[6:7], v35
332; SDAG-NEXT:    v_lshr_b64 v[6:7], v[6:7], v36
333; SDAG-NEXT:    v_addc_u32_e32 v35, vcc, -1, v28, vcc
334; SDAG-NEXT:    v_or_b32_e32 v9, v9, v49
335; SDAG-NEXT:    v_or_b32_e32 v8, v8, v48
336; SDAG-NEXT:    v_addc_u32_e32 v36, vcc, -1, v0, vcc
337; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v30
338; SDAG-NEXT:    v_cndmask_b32_e64 v9, v7, v9, s[4:5]
339; SDAG-NEXT:    v_cndmask_b32_e64 v8, v6, v8, s[4:5]
340; SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, v38, s[4:5]
341; SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v37, s[4:5]
342; SDAG-NEXT:    v_addc_u32_e32 v37, vcc, -1, v1, vcc
343; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v30
344; SDAG-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
345; SDAG-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
346; SDAG-NEXT:    v_mov_b32_e32 v9, 0
347; SDAG-NEXT:  .LBB0_9: ; %udiv-do-while
348; SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
349; SDAG-NEXT:    v_lshl_b64 v[6:7], v[6:7], 1
350; SDAG-NEXT:    v_lshrrev_b32_e32 v8, 31, v3
351; SDAG-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
352; SDAG-NEXT:    v_lshrrev_b32_e32 v38, 31, v5
353; SDAG-NEXT:    v_lshl_b64 v[4:5], v[4:5], 1
354; SDAG-NEXT:    v_lshrrev_b32_e32 v39, 31, v11
355; SDAG-NEXT:    v_lshl_b64 v[10:11], v[10:11], 1
356; SDAG-NEXT:    v_or_b32_e32 v6, v6, v8
357; SDAG-NEXT:    v_or_b32_e32 v2, v2, v38
358; SDAG-NEXT:    v_or_b32_e32 v4, v4, v39
359; SDAG-NEXT:    v_or_b32_e32 v5, v13, v5
360; SDAG-NEXT:    v_or_b32_e32 v11, v15, v11
361; SDAG-NEXT:    v_sub_i32_e32 v8, vcc, v34, v2
362; SDAG-NEXT:    v_or_b32_e32 v4, v12, v4
363; SDAG-NEXT:    v_subb_u32_e32 v8, vcc, v35, v3, vcc
364; SDAG-NEXT:    v_subb_u32_e32 v8, vcc, v36, v6, vcc
365; SDAG-NEXT:    v_subb_u32_e32 v8, vcc, v37, v7, vcc
366; SDAG-NEXT:    v_ashrrev_i32_e32 v8, 31, v8
367; SDAG-NEXT:    v_and_b32_e32 v15, v8, v29
368; SDAG-NEXT:    v_and_b32_e32 v38, v8, v28
369; SDAG-NEXT:    v_and_b32_e32 v39, v8, v0
370; SDAG-NEXT:    v_and_b32_e32 v48, v8, v1
371; SDAG-NEXT:    v_sub_i32_e32 v2, vcc, v2, v15
372; SDAG-NEXT:    v_subb_u32_e32 v3, vcc, v3, v38, vcc
373; SDAG-NEXT:    v_subb_u32_e32 v6, vcc, v6, v39, vcc
374; SDAG-NEXT:    v_subb_u32_e32 v7, vcc, v7, v48, vcc
375; SDAG-NEXT:    v_add_i32_e32 v30, vcc, -1, v30
376; SDAG-NEXT:    v_addc_u32_e32 v31, vcc, -1, v31, vcc
377; SDAG-NEXT:    v_addc_u32_e32 v32, vcc, -1, v32, vcc
378; SDAG-NEXT:    v_addc_u32_e32 v33, vcc, -1, v33, vcc
379; SDAG-NEXT:    v_or_b32_e32 v38, v30, v32
380; SDAG-NEXT:    v_or_b32_e32 v39, v31, v33
381; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[38:39]
382; SDAG-NEXT:    v_and_b32_e32 v8, 1, v8
383; SDAG-NEXT:    s_or_b64 s[10:11], vcc, s[10:11]
384; SDAG-NEXT:    v_or_b32_e32 v10, v14, v10
385; SDAG-NEXT:    v_mov_b32_e32 v15, v9
386; SDAG-NEXT:    v_mov_b32_e32 v14, v8
387; SDAG-NEXT:    s_andn2_b64 exec, exec, s[10:11]
388; SDAG-NEXT:    s_cbranch_execnz .LBB0_9
389; SDAG-NEXT:  ; %bb.10: ; %Flow
390; SDAG-NEXT:    s_or_b64 exec, exec, s[10:11]
391; SDAG-NEXT:  .LBB0_11: ; %Flow11
392; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
393; SDAG-NEXT:    v_lshl_b64 v[0:1], v[4:5], 1
394; SDAG-NEXT:    v_lshrrev_b32_e32 v4, 31, v11
395; SDAG-NEXT:    v_lshl_b64 v[2:3], v[10:11], 1
396; SDAG-NEXT:    v_or_b32_e32 v0, v0, v4
397; SDAG-NEXT:    v_or_b32_e32 v13, v13, v1
398; SDAG-NEXT:    v_or_b32_e32 v14, v9, v3
399; SDAG-NEXT:    v_or_b32_e32 v9, v12, v0
400; SDAG-NEXT:    v_or_b32_e32 v8, v8, v2
401; SDAG-NEXT:  .LBB0_12: ; %Flow12
402; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
403; SDAG-NEXT:    v_xor_b32_e32 v3, v27, v26
404; SDAG-NEXT:    v_xor_b32_e32 v2, v25, v24
405; SDAG-NEXT:    v_xor_b32_e32 v7, v21, v20
406; SDAG-NEXT:    v_xor_b32_e32 v6, v17, v16
407; SDAG-NEXT:    v_xor_b32_e32 v4, v18, v3
408; SDAG-NEXT:    v_xor_b32_e32 v5, v22, v2
409; SDAG-NEXT:    v_xor_b32_e32 v1, v19, v3
410; SDAG-NEXT:    v_xor_b32_e32 v0, v23, v2
411; SDAG-NEXT:    v_xor_b32_e32 v10, v13, v7
412; SDAG-NEXT:    v_xor_b32_e32 v9, v9, v6
413; SDAG-NEXT:    v_xor_b32_e32 v11, v14, v7
414; SDAG-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
415; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
416; SDAG-NEXT:    v_subb_u32_e32 v2, vcc, v5, v2, vcc
417; SDAG-NEXT:    v_subb_u32_e32 v3, vcc, v4, v3, vcc
418; SDAG-NEXT:    v_xor_b32_e32 v4, v8, v6
419; SDAG-NEXT:    v_sub_i32_e32 v4, vcc, v4, v6
420; SDAG-NEXT:    v_subb_u32_e32 v5, vcc, v11, v7, vcc
421; SDAG-NEXT:    v_subb_u32_e32 v6, vcc, v9, v6, vcc
422; SDAG-NEXT:    v_subb_u32_e32 v7, vcc, v10, v7, vcc
423; SDAG-NEXT:    s_setpc_b64 s[30:31]
424;
425; GISEL-LABEL: v_sdiv_v2i128_vv:
426; GISEL:       ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
427; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428; GISEL-NEXT:    v_ashrrev_i32_e32 v24, 31, v3
429; GISEL-NEXT:    v_ashrrev_i32_e32 v25, 31, v11
430; GISEL-NEXT:    v_mov_b32_e32 v16, 0x7f
431; GISEL-NEXT:    v_mov_b32_e32 v17, 0
432; GISEL-NEXT:    s_mov_b64 s[8:9], 0
433; GISEL-NEXT:    v_xor_b32_e32 v0, v24, v0
434; GISEL-NEXT:    v_xor_b32_e32 v1, v24, v1
435; GISEL-NEXT:    v_xor_b32_e32 v2, v24, v2
436; GISEL-NEXT:    v_xor_b32_e32 v3, v24, v3
437; GISEL-NEXT:    v_xor_b32_e32 v8, v25, v8
438; GISEL-NEXT:    v_xor_b32_e32 v9, v25, v9
439; GISEL-NEXT:    v_xor_b32_e32 v10, v25, v10
440; GISEL-NEXT:    v_xor_b32_e32 v11, v25, v11
441; GISEL-NEXT:    v_sub_i32_e32 v18, vcc, v0, v24
442; GISEL-NEXT:    v_subb_u32_e32 v19, vcc, v1, v24, vcc
443; GISEL-NEXT:    v_sub_i32_e64 v26, s[4:5], v8, v25
444; GISEL-NEXT:    v_subb_u32_e64 v27, s[4:5], v9, v25, s[4:5]
445; GISEL-NEXT:    v_subb_u32_e32 v20, vcc, v2, v24, vcc
446; GISEL-NEXT:    v_subb_u32_e32 v21, vcc, v3, v24, vcc
447; GISEL-NEXT:    v_subb_u32_e64 v10, vcc, v10, v25, s[4:5]
448; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, v11, v25, vcc
449; GISEL-NEXT:    v_ffbh_u32_e32 v8, v27
450; GISEL-NEXT:    v_ffbh_u32_e32 v9, v26
451; GISEL-NEXT:    v_ffbh_u32_e32 v22, v18
452; GISEL-NEXT:    v_ffbh_u32_e32 v23, v19
453; GISEL-NEXT:    v_or_b32_e32 v0, v26, v10
454; GISEL-NEXT:    v_or_b32_e32 v1, v27, v11
455; GISEL-NEXT:    v_or_b32_e32 v2, v18, v20
456; GISEL-NEXT:    v_or_b32_e32 v3, v19, v21
457; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 32, v9
458; GISEL-NEXT:    v_add_i32_e32 v22, vcc, 32, v22
459; GISEL-NEXT:    v_ffbh_u32_e32 v28, v10
460; GISEL-NEXT:    v_ffbh_u32_e32 v29, v11
461; GISEL-NEXT:    v_ffbh_u32_e32 v30, v20
462; GISEL-NEXT:    v_ffbh_u32_e32 v31, v21
463; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
464; GISEL-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[2:3]
465; GISEL-NEXT:    v_min_u32_e32 v0, v8, v9
466; GISEL-NEXT:    v_min_u32_e32 v1, v23, v22
467; GISEL-NEXT:    v_add_i32_e64 v2, s[6:7], 32, v28
468; GISEL-NEXT:    v_add_i32_e64 v3, s[6:7], 32, v30
469; GISEL-NEXT:    v_min_u32_e32 v2, v29, v2
470; GISEL-NEXT:    v_min_u32_e32 v3, v31, v3
471; GISEL-NEXT:    v_add_i32_e64 v0, s[6:7], 64, v0
472; GISEL-NEXT:    v_add_i32_e64 v1, s[6:7], 64, v1
473; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
474; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, s[4:5]
475; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
476; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
477; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[20:21]
478; GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
479; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
480; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
481; GISEL-NEXT:    v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
482; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
483; GISEL-NEXT:    v_xor_b32_e32 v8, 0x7f, v2
484; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[2:3], v[16:17]
485; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
486; GISEL-NEXT:    v_or_b32_e32 v8, v8, v0
487; GISEL-NEXT:    v_or_b32_e32 v9, v3, v1
488; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
489; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
490; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
491; GISEL-NEXT:    v_cndmask_b32_e32 v16, v17, v16, vcc
492; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
493; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
494; GISEL-NEXT:    v_or_b32_e32 v9, v22, v16
495; GISEL-NEXT:    v_or_b32_e32 v8, v9, v8
496; GISEL-NEXT:    v_and_b32_e32 v9, 1, v9
497; GISEL-NEXT:    v_and_b32_e32 v8, 1, v8
498; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
499; GISEL-NEXT:    v_cndmask_b32_e64 v22, v18, 0, vcc
500; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v8
501; GISEL-NEXT:    v_cndmask_b32_e64 v8, v20, 0, vcc
502; GISEL-NEXT:    v_cndmask_b32_e64 v9, v21, 0, vcc
503; GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
504; GISEL-NEXT:    v_cndmask_b32_e64 v23, v19, 0, vcc
505; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
506; GISEL-NEXT:    s_cbranch_execz .LBB0_6
507; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
508; GISEL-NEXT:    v_add_i32_e32 v28, vcc, 1, v2
509; GISEL-NEXT:    v_addc_u32_e64 v29, s[4:5], 0, v3, vcc
510; GISEL-NEXT:    v_sub_i32_e32 v32, vcc, 0x7f, v2
511; GISEL-NEXT:    v_not_b32_e32 v2, 63
512; GISEL-NEXT:    v_addc_u32_e64 v30, vcc, 0, v0, s[4:5]
513; GISEL-NEXT:    v_addc_u32_e32 v31, vcc, 0, v1, vcc
514; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v32, v2
515; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], 64, v32
516; GISEL-NEXT:    v_lshl_b64 v[0:1], v[18:19], v32
517; GISEL-NEXT:    v_lshl_b64 v[2:3], v[20:21], v32
518; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
519; GISEL-NEXT:    v_lshr_b64 v[8:9], v[18:19], v8
520; GISEL-NEXT:    v_lshl_b64 v[22:23], v[18:19], v16
521; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v32
522; GISEL-NEXT:    v_cndmask_b32_e32 v16, 0, v0, vcc
523; GISEL-NEXT:    v_cndmask_b32_e32 v17, 0, v1, vcc
524; GISEL-NEXT:    v_or_b32_e32 v0, v8, v2
525; GISEL-NEXT:    v_or_b32_e32 v1, v9, v3
526; GISEL-NEXT:    v_cndmask_b32_e32 v0, v22, v0, vcc
527; GISEL-NEXT:    v_cndmask_b32_e32 v1, v23, v1, vcc
528; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v32
529; GISEL-NEXT:    v_cndmask_b32_e32 v8, v0, v20, vcc
530; GISEL-NEXT:    v_cndmask_b32_e32 v9, v1, v21, vcc
531; GISEL-NEXT:    s_mov_b64 s[10:11], s[8:9]
532; GISEL-NEXT:    v_mov_b32_e32 v0, s8
533; GISEL-NEXT:    v_mov_b32_e32 v1, s9
534; GISEL-NEXT:    v_mov_b32_e32 v2, s10
535; GISEL-NEXT:    v_mov_b32_e32 v3, s11
536; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
537; GISEL-NEXT:    s_xor_b64 s[12:13], exec, s[8:9]
538; GISEL-NEXT:    s_cbranch_execz .LBB0_5
539; GISEL-NEXT:  ; %bb.2: ; %udiv-preheader4
540; GISEL-NEXT:    v_add_i32_e32 v32, vcc, 0xffffffc0, v28
541; GISEL-NEXT:    v_sub_i32_e32 v22, vcc, 64, v28
542; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v28
543; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v28
544; GISEL-NEXT:    v_lshr_b64 v[0:1], v[20:21], v28
545; GISEL-NEXT:    v_lshr_b64 v[2:3], v[18:19], v28
546; GISEL-NEXT:    v_lshl_b64 v[22:23], v[20:21], v22
547; GISEL-NEXT:    v_or_b32_e32 v22, v2, v22
548; GISEL-NEXT:    v_or_b32_e32 v23, v3, v23
549; GISEL-NEXT:    s_mov_b64 s[8:9], 0
550; GISEL-NEXT:    v_lshr_b64 v[2:3], v[20:21], v32
551; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v22, vcc
552; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v23, vcc
553; GISEL-NEXT:    v_cndmask_b32_e64 v18, v2, v18, s[4:5]
554; GISEL-NEXT:    v_cndmask_b32_e64 v19, v3, v19, s[4:5]
555; GISEL-NEXT:    v_cndmask_b32_e32 v20, 0, v0, vcc
556; GISEL-NEXT:    v_cndmask_b32_e32 v21, 0, v1, vcc
557; GISEL-NEXT:    v_add_i32_e32 v32, vcc, -1, v26
558; GISEL-NEXT:    v_addc_u32_e32 v33, vcc, -1, v27, vcc
559; GISEL-NEXT:    v_addc_u32_e32 v34, vcc, -1, v10, vcc
560; GISEL-NEXT:    v_addc_u32_e32 v35, vcc, -1, v11, vcc
561; GISEL-NEXT:    s_mov_b64 s[10:11], s[8:9]
562; GISEL-NEXT:    v_mov_b32_e32 v23, 0
563; GISEL-NEXT:    v_mov_b32_e32 v0, s8
564; GISEL-NEXT:    v_mov_b32_e32 v1, s9
565; GISEL-NEXT:    v_mov_b32_e32 v2, s10
566; GISEL-NEXT:    v_mov_b32_e32 v3, s11
567; GISEL-NEXT:  .LBB0_3: ; %udiv-do-while3
568; GISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
569; GISEL-NEXT:    v_lshrrev_b32_e32 v36, 31, v17
570; GISEL-NEXT:    v_lshl_b64 v[2:3], v[16:17], 1
571; GISEL-NEXT:    v_or_b32_e32 v16, v0, v2
572; GISEL-NEXT:    v_or_b32_e32 v17, v1, v3
573; GISEL-NEXT:    v_lshrrev_b32_e32 v22, 31, v19
574; GISEL-NEXT:    v_lshl_b64 v[0:1], v[18:19], 1
575; GISEL-NEXT:    v_lshl_b64 v[2:3], v[20:21], 1
576; GISEL-NEXT:    v_or_b32_e32 v2, v2, v22
577; GISEL-NEXT:    v_lshrrev_b32_e32 v18, 31, v9
578; GISEL-NEXT:    v_or_b32_e32 v0, v0, v18
579; GISEL-NEXT:    v_sub_i32_e32 v18, vcc, v32, v0
580; GISEL-NEXT:    v_subb_u32_e32 v18, vcc, v33, v1, vcc
581; GISEL-NEXT:    v_subb_u32_e32 v18, vcc, v34, v2, vcc
582; GISEL-NEXT:    v_subb_u32_e32 v18, vcc, v35, v3, vcc
583; GISEL-NEXT:    v_ashrrev_i32_e32 v22, 31, v18
584; GISEL-NEXT:    v_and_b32_e32 v18, v22, v26
585; GISEL-NEXT:    v_sub_i32_e32 v18, vcc, v0, v18
586; GISEL-NEXT:    v_and_b32_e32 v0, v22, v27
587; GISEL-NEXT:    v_subb_u32_e32 v19, vcc, v1, v0, vcc
588; GISEL-NEXT:    v_and_b32_e32 v0, v22, v10
589; GISEL-NEXT:    v_subb_u32_e32 v20, vcc, v2, v0, vcc
590; GISEL-NEXT:    v_and_b32_e32 v0, v22, v11
591; GISEL-NEXT:    v_subb_u32_e32 v21, vcc, v3, v0, vcc
592; GISEL-NEXT:    v_add_i32_e32 v28, vcc, -1, v28
593; GISEL-NEXT:    v_addc_u32_e32 v29, vcc, -1, v29, vcc
594; GISEL-NEXT:    v_addc_u32_e32 v30, vcc, -1, v30, vcc
595; GISEL-NEXT:    v_addc_u32_e32 v31, vcc, -1, v31, vcc
596; GISEL-NEXT:    v_or_b32_e32 v0, v28, v30
597; GISEL-NEXT:    v_or_b32_e32 v1, v29, v31
598; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
599; GISEL-NEXT:    v_and_b32_e32 v22, 1, v22
600; GISEL-NEXT:    v_lshl_b64 v[8:9], v[8:9], 1
601; GISEL-NEXT:    s_or_b64 s[8:9], vcc, s[8:9]
602; GISEL-NEXT:    v_or_b32_e32 v8, v8, v36
603; GISEL-NEXT:    v_mov_b32_e32 v0, v22
604; GISEL-NEXT:    v_mov_b32_e32 v1, v23
605; GISEL-NEXT:    s_andn2_b64 exec, exec, s[8:9]
606; GISEL-NEXT:    s_cbranch_execnz .LBB0_3
607; GISEL-NEXT:  ; %bb.4: ; %Flow13
608; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
609; GISEL-NEXT:  .LBB0_5: ; %Flow14
610; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
611; GISEL-NEXT:    v_lshl_b64 v[2:3], v[16:17], 1
612; GISEL-NEXT:    v_lshl_b64 v[8:9], v[8:9], 1
613; GISEL-NEXT:    v_lshrrev_b32_e32 v10, 31, v17
614; GISEL-NEXT:    v_or_b32_e32 v8, v8, v10
615; GISEL-NEXT:    v_or_b32_e32 v22, v0, v2
616; GISEL-NEXT:    v_or_b32_e32 v23, v1, v3
617; GISEL-NEXT:  .LBB0_6: ; %Flow16
618; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
619; GISEL-NEXT:    s_mov_b64 s[8:9], 0
620; GISEL-NEXT:    v_ashrrev_i32_e32 v18, 31, v7
621; GISEL-NEXT:    v_ashrrev_i32_e32 v19, 31, v15
622; GISEL-NEXT:    v_mov_b32_e32 v10, 0x7f
623; GISEL-NEXT:    v_mov_b32_e32 v11, 0
624; GISEL-NEXT:    v_xor_b32_e32 v0, v18, v4
625; GISEL-NEXT:    v_xor_b32_e32 v1, v18, v5
626; GISEL-NEXT:    v_xor_b32_e32 v2, v18, v6
627; GISEL-NEXT:    v_xor_b32_e32 v3, v18, v7
628; GISEL-NEXT:    v_xor_b32_e32 v4, v19, v12
629; GISEL-NEXT:    v_xor_b32_e32 v5, v19, v13
630; GISEL-NEXT:    v_xor_b32_e32 v14, v19, v14
631; GISEL-NEXT:    v_xor_b32_e32 v15, v19, v15
632; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v0, v18
633; GISEL-NEXT:    v_subb_u32_e32 v7, vcc, v1, v18, vcc
634; GISEL-NEXT:    v_sub_i32_e64 v20, s[4:5], v4, v19
635; GISEL-NEXT:    v_subb_u32_e64 v21, s[4:5], v5, v19, s[4:5]
636; GISEL-NEXT:    v_subb_u32_e32 v12, vcc, v2, v18, vcc
637; GISEL-NEXT:    v_subb_u32_e32 v13, vcc, v3, v18, vcc
638; GISEL-NEXT:    v_subb_u32_e64 v4, vcc, v14, v19, s[4:5]
639; GISEL-NEXT:    v_subb_u32_e32 v5, vcc, v15, v19, vcc
640; GISEL-NEXT:    v_ffbh_u32_e32 v14, v21
641; GISEL-NEXT:    v_ffbh_u32_e32 v15, v20
642; GISEL-NEXT:    v_ffbh_u32_e32 v16, v7
643; GISEL-NEXT:    v_ffbh_u32_e32 v17, v6
644; GISEL-NEXT:    v_or_b32_e32 v0, v20, v4
645; GISEL-NEXT:    v_or_b32_e32 v1, v21, v5
646; GISEL-NEXT:    v_or_b32_e32 v2, v6, v12
647; GISEL-NEXT:    v_or_b32_e32 v3, v7, v13
648; GISEL-NEXT:    v_add_i32_e32 v15, vcc, 32, v15
649; GISEL-NEXT:    v_ffbh_u32_e32 v26, v5
650; GISEL-NEXT:    v_ffbh_u32_e32 v27, v4
651; GISEL-NEXT:    v_add_i32_e32 v17, vcc, 32, v17
652; GISEL-NEXT:    v_ffbh_u32_e32 v28, v13
653; GISEL-NEXT:    v_ffbh_u32_e32 v29, v12
654; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
655; GISEL-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[2:3]
656; GISEL-NEXT:    v_min_u32_e32 v0, v14, v15
657; GISEL-NEXT:    v_add_i32_e64 v1, s[6:7], 32, v27
658; GISEL-NEXT:    v_min_u32_e32 v2, v16, v17
659; GISEL-NEXT:    v_add_i32_e64 v3, s[6:7], 32, v29
660; GISEL-NEXT:    v_add_i32_e64 v0, s[6:7], 64, v0
661; GISEL-NEXT:    v_min_u32_e32 v1, v26, v1
662; GISEL-NEXT:    v_add_i32_e64 v2, s[6:7], 64, v2
663; GISEL-NEXT:    v_min_u32_e32 v3, v28, v3
664; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
665; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
666; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
667; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
668; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[12:13]
669; GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
670; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
671; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
672; GISEL-NEXT:    v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
673; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
674; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[2:3], v[10:11]
675; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
676; GISEL-NEXT:    v_xor_b32_e32 v10, 0x7f, v2
677; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
678; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
679; GISEL-NEXT:    v_or_b32_e32 v10, v10, v0
680; GISEL-NEXT:    v_or_b32_e32 v11, v3, v1
681; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
682; GISEL-NEXT:    v_cndmask_b32_e32 v15, v16, v15, vcc
683; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
684; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
685; GISEL-NEXT:    v_or_b32_e32 v11, v14, v15
686; GISEL-NEXT:    v_and_b32_e32 v14, 1, v11
687; GISEL-NEXT:    v_or_b32_e32 v10, v11, v10
688; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v14
689; GISEL-NEXT:    v_cndmask_b32_e64 v14, v6, 0, vcc
690; GISEL-NEXT:    v_and_b32_e32 v16, 1, v10
691; GISEL-NEXT:    v_cndmask_b32_e64 v15, v7, 0, vcc
692; GISEL-NEXT:    v_cndmask_b32_e64 v10, v12, 0, vcc
693; GISEL-NEXT:    v_cndmask_b32_e64 v11, v13, 0, vcc
694; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v16
695; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
696; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
697; GISEL-NEXT:    s_cbranch_execz .LBB0_12
698; GISEL-NEXT:  ; %bb.7: ; %udiv-bb1
699; GISEL-NEXT:    v_add_i32_e32 v26, vcc, 1, v2
700; GISEL-NEXT:    v_addc_u32_e64 v27, s[4:5], 0, v3, vcc
701; GISEL-NEXT:    v_sub_i32_e32 v30, vcc, 0x7f, v2
702; GISEL-NEXT:    v_not_b32_e32 v2, 63
703; GISEL-NEXT:    v_addc_u32_e64 v28, vcc, 0, v0, s[4:5]
704; GISEL-NEXT:    v_addc_u32_e32 v29, vcc, 0, v1, vcc
705; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v30, v2
706; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], 64, v30
707; GISEL-NEXT:    v_lshl_b64 v[0:1], v[6:7], v30
708; GISEL-NEXT:    v_lshl_b64 v[2:3], v[12:13], v30
709; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
710; GISEL-NEXT:    v_lshr_b64 v[10:11], v[6:7], v10
711; GISEL-NEXT:    v_lshl_b64 v[16:17], v[6:7], v14
712; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v30
713; GISEL-NEXT:    v_cndmask_b32_e32 v14, 0, v0, vcc
714; GISEL-NEXT:    v_cndmask_b32_e32 v15, 0, v1, vcc
715; GISEL-NEXT:    v_or_b32_e32 v0, v10, v2
716; GISEL-NEXT:    v_or_b32_e32 v1, v11, v3
717; GISEL-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc
718; GISEL-NEXT:    v_cndmask_b32_e32 v1, v17, v1, vcc
719; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v30
720; GISEL-NEXT:    v_cndmask_b32_e32 v10, v0, v12, vcc
721; GISEL-NEXT:    v_cndmask_b32_e32 v11, v1, v13, vcc
722; GISEL-NEXT:    s_mov_b64 s[10:11], s[8:9]
723; GISEL-NEXT:    v_mov_b32_e32 v0, s8
724; GISEL-NEXT:    v_mov_b32_e32 v1, s9
725; GISEL-NEXT:    v_mov_b32_e32 v2, s10
726; GISEL-NEXT:    v_mov_b32_e32 v3, s11
727; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
728; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
729; GISEL-NEXT:    s_cbranch_execz .LBB0_11
730; GISEL-NEXT:  ; %bb.8: ; %udiv-preheader
731; GISEL-NEXT:    v_add_i32_e32 v32, vcc, 0xffffffc0, v26
732; GISEL-NEXT:    v_sub_i32_e32 v16, vcc, 64, v26
733; GISEL-NEXT:    v_lshr_b64 v[0:1], v[12:13], v26
734; GISEL-NEXT:    v_lshr_b64 v[2:3], v[6:7], v26
735; GISEL-NEXT:    s_mov_b64 s[4:5], 0
736; GISEL-NEXT:    v_add_i32_e32 v30, vcc, -1, v20
737; GISEL-NEXT:    v_addc_u32_e32 v31, vcc, -1, v21, vcc
738; GISEL-NEXT:    v_lshl_b64 v[16:17], v[12:13], v16
739; GISEL-NEXT:    v_lshr_b64 v[12:13], v[12:13], v32
740; GISEL-NEXT:    v_addc_u32_e32 v32, vcc, -1, v4, vcc
741; GISEL-NEXT:    v_addc_u32_e32 v33, vcc, -1, v5, vcc
742; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
743; GISEL-NEXT:    v_or_b32_e32 v2, v2, v16
744; GISEL-NEXT:    v_or_b32_e32 v3, v3, v17
745; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v26
746; GISEL-NEXT:    v_cndmask_b32_e32 v2, v12, v2, vcc
747; GISEL-NEXT:    v_cndmask_b32_e32 v3, v13, v3, vcc
748; GISEL-NEXT:    v_cndmask_b32_e32 v16, 0, v0, vcc
749; GISEL-NEXT:    v_cndmask_b32_e32 v17, 0, v1, vcc
750; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v26
751; GISEL-NEXT:    v_cndmask_b32_e32 v12, v2, v6, vcc
752; GISEL-NEXT:    v_cndmask_b32_e32 v13, v3, v7, vcc
753; GISEL-NEXT:    v_mov_b32_e32 v7, 0
754; GISEL-NEXT:    v_mov_b32_e32 v0, s4
755; GISEL-NEXT:    v_mov_b32_e32 v1, s5
756; GISEL-NEXT:    v_mov_b32_e32 v2, s6
757; GISEL-NEXT:    v_mov_b32_e32 v3, s7
758; GISEL-NEXT:  .LBB0_9: ; %udiv-do-while
759; GISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
760; GISEL-NEXT:    v_lshl_b64 v[2:3], v[12:13], 1
761; GISEL-NEXT:    v_lshl_b64 v[16:17], v[16:17], 1
762; GISEL-NEXT:    v_lshrrev_b32_e32 v6, 31, v13
763; GISEL-NEXT:    v_lshrrev_b32_e32 v34, 31, v11
764; GISEL-NEXT:    v_lshl_b64 v[12:13], v[14:15], 1
765; GISEL-NEXT:    v_lshl_b64 v[10:11], v[10:11], 1
766; GISEL-NEXT:    v_lshrrev_b32_e32 v14, 31, v15
767; GISEL-NEXT:    v_add_i32_e32 v26, vcc, -1, v26
768; GISEL-NEXT:    v_addc_u32_e32 v27, vcc, -1, v27, vcc
769; GISEL-NEXT:    v_or_b32_e32 v16, v16, v6
770; GISEL-NEXT:    v_or_b32_e32 v2, v2, v34
771; GISEL-NEXT:    v_or_b32_e32 v10, v10, v14
772; GISEL-NEXT:    v_or_b32_e32 v14, v0, v12
773; GISEL-NEXT:    v_or_b32_e32 v15, v1, v13
774; GISEL-NEXT:    v_addc_u32_e32 v28, vcc, -1, v28, vcc
775; GISEL-NEXT:    v_addc_u32_e32 v29, vcc, -1, v29, vcc
776; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v30, v2
777; GISEL-NEXT:    v_subb_u32_e32 v0, vcc, v31, v3, vcc
778; GISEL-NEXT:    v_or_b32_e32 v0, v26, v28
779; GISEL-NEXT:    v_or_b32_e32 v1, v27, v29
780; GISEL-NEXT:    v_subb_u32_e32 v6, vcc, v32, v16, vcc
781; GISEL-NEXT:    v_subb_u32_e32 v6, vcc, v33, v17, vcc
782; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
783; GISEL-NEXT:    v_ashrrev_i32_e32 v0, 31, v6
784; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
785; GISEL-NEXT:    v_and_b32_e32 v6, 1, v0
786; GISEL-NEXT:    v_and_b32_e32 v12, v0, v20
787; GISEL-NEXT:    v_and_b32_e32 v13, v0, v21
788; GISEL-NEXT:    v_and_b32_e32 v34, v0, v4
789; GISEL-NEXT:    v_and_b32_e32 v35, v0, v5
790; GISEL-NEXT:    v_mov_b32_e32 v0, v6
791; GISEL-NEXT:    v_mov_b32_e32 v1, v7
792; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, v2, v12
793; GISEL-NEXT:    v_subb_u32_e32 v13, vcc, v3, v13, vcc
794; GISEL-NEXT:    v_subb_u32_e32 v16, vcc, v16, v34, vcc
795; GISEL-NEXT:    v_subb_u32_e32 v17, vcc, v17, v35, vcc
796; GISEL-NEXT:    s_andn2_b64 exec, exec, s[4:5]
797; GISEL-NEXT:    s_cbranch_execnz .LBB0_9
798; GISEL-NEXT:  ; %bb.10: ; %Flow
799; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
800; GISEL-NEXT:  .LBB0_11: ; %Flow11
801; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
802; GISEL-NEXT:    v_lshl_b64 v[2:3], v[14:15], 1
803; GISEL-NEXT:    v_lshl_b64 v[10:11], v[10:11], 1
804; GISEL-NEXT:    v_lshrrev_b32_e32 v4, 31, v15
805; GISEL-NEXT:    v_or_b32_e32 v10, v10, v4
806; GISEL-NEXT:    v_or_b32_e32 v14, v0, v2
807; GISEL-NEXT:    v_or_b32_e32 v15, v1, v3
808; GISEL-NEXT:  .LBB0_12: ; %Flow12
809; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
810; GISEL-NEXT:    v_xor_b32_e32 v3, v25, v24
811; GISEL-NEXT:    v_xor_b32_e32 v7, v19, v18
812; GISEL-NEXT:    v_xor_b32_e32 v0, v22, v3
813; GISEL-NEXT:    v_xor_b32_e32 v1, v23, v3
814; GISEL-NEXT:    v_xor_b32_e32 v2, v8, v3
815; GISEL-NEXT:    v_xor_b32_e32 v6, v9, v3
816; GISEL-NEXT:    v_xor_b32_e32 v4, v14, v7
817; GISEL-NEXT:    v_xor_b32_e32 v5, v15, v7
818; GISEL-NEXT:    v_xor_b32_e32 v8, v10, v7
819; GISEL-NEXT:    v_xor_b32_e32 v9, v11, v7
820; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
821; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
822; GISEL-NEXT:    v_sub_i32_e64 v4, s[4:5], v4, v7
823; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v5, v7, s[4:5]
824; GISEL-NEXT:    v_subb_u32_e32 v2, vcc, v2, v3, vcc
825; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v6, v3, vcc
826; GISEL-NEXT:    v_subb_u32_e64 v6, vcc, v8, v7, s[4:5]
827; GISEL-NEXT:    v_subb_u32_e32 v7, vcc, v9, v7, vcc
828; GISEL-NEXT:    s_setpc_b64 s[30:31]
829  %shl = sdiv <2 x i128> %lhs, %rhs
830  ret <2 x i128> %shl
831}
832
833define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
834; SDAG-LABEL: v_udiv_v2i128_vv:
835; SDAG:       ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
836; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
837; SDAG-NEXT:    v_or_b32_e32 v17, v9, v11
838; SDAG-NEXT:    v_or_b32_e32 v16, v8, v10
839; SDAG-NEXT:    v_or_b32_e32 v19, v1, v3
840; SDAG-NEXT:    v_or_b32_e32 v18, v0, v2
841; SDAG-NEXT:    v_ffbh_u32_e32 v20, v10
842; SDAG-NEXT:    v_ffbh_u32_e32 v21, v11
843; SDAG-NEXT:    v_ffbh_u32_e32 v22, v8
844; SDAG-NEXT:    v_ffbh_u32_e32 v23, v9
845; SDAG-NEXT:    v_ffbh_u32_e32 v24, v2
846; SDAG-NEXT:    v_ffbh_u32_e32 v25, v3
847; SDAG-NEXT:    v_ffbh_u32_e32 v26, v0
848; SDAG-NEXT:    v_ffbh_u32_e32 v27, v1
849; SDAG-NEXT:    v_mov_b32_e32 v28, 0
850; SDAG-NEXT:    s_mov_b64 s[8:9], 0x7f
851; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
852; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
853; SDAG-NEXT:    v_add_i32_e64 v16, s[6:7], 32, v20
854; SDAG-NEXT:    v_add_i32_e64 v17, s[6:7], 32, v22
855; SDAG-NEXT:    v_add_i32_e64 v18, s[6:7], 32, v24
856; SDAG-NEXT:    v_add_i32_e64 v19, s[6:7], 32, v26
857; SDAG-NEXT:    v_min_u32_e32 v16, v16, v21
858; SDAG-NEXT:    v_min_u32_e32 v17, v17, v23
859; SDAG-NEXT:    v_min_u32_e32 v18, v18, v25
860; SDAG-NEXT:    v_min_u32_e32 v19, v19, v27
861; SDAG-NEXT:    s_or_b64 s[6:7], vcc, s[4:5]
862; SDAG-NEXT:    v_add_i32_e32 v17, vcc, 64, v17
863; SDAG-NEXT:    v_addc_u32_e64 v20, s[4:5], 0, 0, vcc
864; SDAG-NEXT:    v_add_i32_e32 v19, vcc, 64, v19
865; SDAG-NEXT:    v_addc_u32_e64 v21, s[4:5], 0, 0, vcc
866; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
867; SDAG-NEXT:    v_cndmask_b32_e64 v20, v20, 0, vcc
868; SDAG-NEXT:    v_cndmask_b32_e32 v16, v17, v16, vcc
869; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
870; SDAG-NEXT:    v_cndmask_b32_e64 v17, v21, 0, vcc
871; SDAG-NEXT:    v_cndmask_b32_e32 v18, v19, v18, vcc
872; SDAG-NEXT:    v_sub_i32_e32 v22, vcc, v16, v18
873; SDAG-NEXT:    v_subb_u32_e32 v23, vcc, v20, v17, vcc
874; SDAG-NEXT:    v_xor_b32_e32 v16, 0x7f, v22
875; SDAG-NEXT:    v_subbrev_u32_e32 v24, vcc, 0, v28, vcc
876; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[8:9], v[22:23]
877; SDAG-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
878; SDAG-NEXT:    v_subbrev_u32_e32 v25, vcc, 0, v28, vcc
879; SDAG-NEXT:    v_or_b32_e32 v16, v16, v24
880; SDAG-NEXT:    v_or_b32_e32 v17, v23, v25
881; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[24:25]
882; SDAG-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
883; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[16:17]
884; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[24:25]
885; SDAG-NEXT:    v_cndmask_b32_e64 v16, v19, v18, s[4:5]
886; SDAG-NEXT:    v_and_b32_e32 v16, 1, v16
887; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v16
888; SDAG-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
889; SDAG-NEXT:    v_cndmask_b32_e64 v16, v3, 0, s[4:5]
890; SDAG-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
891; SDAG-NEXT:    v_cndmask_b32_e64 v17, v2, 0, s[4:5]
892; SDAG-NEXT:    v_cndmask_b32_e64 v18, v1, 0, s[4:5]
893; SDAG-NEXT:    s_and_b64 s[8:9], s[6:7], vcc
894; SDAG-NEXT:    v_cndmask_b32_e64 v19, v0, 0, s[4:5]
895; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[8:9]
896; SDAG-NEXT:    s_cbranch_execz .LBB1_6
897; SDAG-NEXT:  ; %bb.1: ; %udiv-bb15
898; SDAG-NEXT:    v_add_i32_e32 v26, vcc, 1, v22
899; SDAG-NEXT:    v_sub_i32_e64 v16, s[4:5], 63, v22
900; SDAG-NEXT:    v_mov_b32_e32 v20, 0
901; SDAG-NEXT:    v_mov_b32_e32 v21, 0
902; SDAG-NEXT:    v_addc_u32_e32 v27, vcc, 0, v23, vcc
903; SDAG-NEXT:    v_lshl_b64 v[16:17], v[0:1], v16
904; SDAG-NEXT:    v_addc_u32_e32 v28, vcc, 0, v24, vcc
905; SDAG-NEXT:    v_addc_u32_e32 v29, vcc, 0, v25, vcc
906; SDAG-NEXT:    v_or_b32_e32 v18, v26, v28
907; SDAG-NEXT:    v_sub_i32_e32 v30, vcc, 0x7f, v22
908; SDAG-NEXT:    v_or_b32_e32 v19, v27, v29
909; SDAG-NEXT:    v_lshl_b64 v[22:23], v[2:3], v30
910; SDAG-NEXT:    v_sub_i32_e32 v31, vcc, 64, v30
911; SDAG-NEXT:    v_lshl_b64 v[24:25], v[0:1], v30
912; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[18:19]
913; SDAG-NEXT:    v_lshr_b64 v[18:19], v[0:1], v31
914; SDAG-NEXT:    v_or_b32_e32 v19, v23, v19
915; SDAG-NEXT:    v_or_b32_e32 v18, v22, v18
916; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v30
917; SDAG-NEXT:    v_cndmask_b32_e64 v19, v17, v19, s[4:5]
918; SDAG-NEXT:    v_cndmask_b32_e64 v18, v16, v18, s[4:5]
919; SDAG-NEXT:    v_cndmask_b32_e64 v17, 0, v25, s[4:5]
920; SDAG-NEXT:    v_cndmask_b32_e64 v16, 0, v24, s[4:5]
921; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v30
922; SDAG-NEXT:    v_cndmask_b32_e64 v19, v19, v3, s[4:5]
923; SDAG-NEXT:    v_cndmask_b32_e64 v18, v18, v2, s[4:5]
924; SDAG-NEXT:    v_mov_b32_e32 v22, 0
925; SDAG-NEXT:    v_mov_b32_e32 v23, 0
926; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
927; SDAG-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
928; SDAG-NEXT:    s_cbranch_execz .LBB1_5
929; SDAG-NEXT:  ; %bb.2: ; %udiv-preheader4
930; SDAG-NEXT:    v_lshr_b64 v[20:21], v[0:1], v26
931; SDAG-NEXT:    v_sub_i32_e32 v22, vcc, 64, v26
932; SDAG-NEXT:    v_lshl_b64 v[22:23], v[2:3], v22
933; SDAG-NEXT:    v_or_b32_e32 v23, v21, v23
934; SDAG-NEXT:    v_or_b32_e32 v22, v20, v22
935; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v26
936; SDAG-NEXT:    v_subrev_i32_e64 v20, s[4:5], 64, v26
937; SDAG-NEXT:    v_lshr_b64 v[20:21], v[2:3], v20
938; SDAG-NEXT:    v_cndmask_b32_e32 v21, v21, v23, vcc
939; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v26
940; SDAG-NEXT:    v_cndmask_b32_e64 v1, v21, v1, s[4:5]
941; SDAG-NEXT:    v_cndmask_b32_e32 v20, v20, v22, vcc
942; SDAG-NEXT:    v_cndmask_b32_e64 v0, v20, v0, s[4:5]
943; SDAG-NEXT:    v_lshr_b64 v[2:3], v[2:3], v26
944; SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
945; SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
946; SDAG-NEXT:    v_add_i32_e32 v30, vcc, -1, v8
947; SDAG-NEXT:    v_addc_u32_e32 v31, vcc, -1, v9, vcc
948; SDAG-NEXT:    v_addc_u32_e32 v32, vcc, -1, v10, vcc
949; SDAG-NEXT:    v_addc_u32_e32 v33, vcc, -1, v11, vcc
950; SDAG-NEXT:    s_mov_b64 s[4:5], 0
951; SDAG-NEXT:    v_mov_b32_e32 v24, 0
952; SDAG-NEXT:    v_mov_b32_e32 v25, 0
953; SDAG-NEXT:    v_mov_b32_e32 v22, 0
954; SDAG-NEXT:    v_mov_b32_e32 v23, 0
955; SDAG-NEXT:    v_mov_b32_e32 v21, 0
956; SDAG-NEXT:  .LBB1_3: ; %udiv-do-while3
957; SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
958; SDAG-NEXT:    v_lshrrev_b32_e32 v34, 31, v17
959; SDAG-NEXT:    v_lshl_b64 v[16:17], v[16:17], 1
960; SDAG-NEXT:    v_or_b32_e32 v17, v25, v17
961; SDAG-NEXT:    v_or_b32_e32 v16, v24, v16
962; SDAG-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
963; SDAG-NEXT:    v_lshrrev_b32_e32 v20, 31, v1
964; SDAG-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
965; SDAG-NEXT:    v_or_b32_e32 v2, v2, v20
966; SDAG-NEXT:    v_lshrrev_b32_e32 v20, 31, v19
967; SDAG-NEXT:    v_or_b32_e32 v0, v0, v20
968; SDAG-NEXT:    v_sub_i32_e32 v20, vcc, v30, v0
969; SDAG-NEXT:    v_subb_u32_e32 v20, vcc, v31, v1, vcc
970; SDAG-NEXT:    v_subb_u32_e32 v20, vcc, v32, v2, vcc
971; SDAG-NEXT:    v_subb_u32_e32 v20, vcc, v33, v3, vcc
972; SDAG-NEXT:    v_ashrrev_i32_e32 v20, 31, v20
973; SDAG-NEXT:    v_and_b32_e32 v24, v20, v8
974; SDAG-NEXT:    v_sub_i32_e32 v0, vcc, v0, v24
975; SDAG-NEXT:    v_and_b32_e32 v24, v20, v9
976; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, v1, v24, vcc
977; SDAG-NEXT:    v_and_b32_e32 v24, v20, v10
978; SDAG-NEXT:    v_subb_u32_e32 v2, vcc, v2, v24, vcc
979; SDAG-NEXT:    v_and_b32_e32 v24, v20, v11
980; SDAG-NEXT:    v_subb_u32_e32 v3, vcc, v3, v24, vcc
981; SDAG-NEXT:    v_add_i32_e32 v26, vcc, -1, v26
982; SDAG-NEXT:    v_addc_u32_e32 v27, vcc, -1, v27, vcc
983; SDAG-NEXT:    v_addc_u32_e32 v28, vcc, -1, v28, vcc
984; SDAG-NEXT:    v_addc_u32_e32 v29, vcc, -1, v29, vcc
985; SDAG-NEXT:    v_or_b32_e32 v24, v26, v28
986; SDAG-NEXT:    v_or_b32_e32 v25, v27, v29
987; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[24:25]
988; SDAG-NEXT:    v_and_b32_e32 v20, 1, v20
989; SDAG-NEXT:    v_lshl_b64 v[18:19], v[18:19], 1
990; SDAG-NEXT:    v_or_b32_e32 v18, v18, v34
991; SDAG-NEXT:    v_or_b32_e32 v19, v23, v19
992; SDAG-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
993; SDAG-NEXT:    v_or_b32_e32 v18, v22, v18
994; SDAG-NEXT:    v_mov_b32_e32 v25, v21
995; SDAG-NEXT:    v_mov_b32_e32 v24, v20
996; SDAG-NEXT:    s_andn2_b64 exec, exec, s[4:5]
997; SDAG-NEXT:    s_cbranch_execnz .LBB1_3
998; SDAG-NEXT:  ; %bb.4: ; %Flow13
999; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
1000; SDAG-NEXT:  .LBB1_5: ; %Flow14
1001; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
1002; SDAG-NEXT:    v_lshl_b64 v[0:1], v[18:19], 1
1003; SDAG-NEXT:    v_lshrrev_b32_e32 v8, 31, v17
1004; SDAG-NEXT:    v_lshl_b64 v[2:3], v[16:17], 1
1005; SDAG-NEXT:    v_or_b32_e32 v0, v0, v8
1006; SDAG-NEXT:    v_or_b32_e32 v16, v23, v1
1007; SDAG-NEXT:    v_or_b32_e32 v18, v21, v3
1008; SDAG-NEXT:    v_or_b32_e32 v17, v22, v0
1009; SDAG-NEXT:    v_or_b32_e32 v19, v20, v2
1010; SDAG-NEXT:  .LBB1_6: ; %Flow16
1011; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
1012; SDAG-NEXT:    v_or_b32_e32 v1, v13, v15
1013; SDAG-NEXT:    v_or_b32_e32 v0, v12, v14
1014; SDAG-NEXT:    v_or_b32_e32 v3, v5, v7
1015; SDAG-NEXT:    v_or_b32_e32 v2, v4, v6
1016; SDAG-NEXT:    v_ffbh_u32_e32 v8, v14
1017; SDAG-NEXT:    v_ffbh_u32_e32 v9, v15
1018; SDAG-NEXT:    v_ffbh_u32_e32 v10, v12
1019; SDAG-NEXT:    v_ffbh_u32_e32 v11, v13
1020; SDAG-NEXT:    v_ffbh_u32_e32 v20, v6
1021; SDAG-NEXT:    v_ffbh_u32_e32 v21, v7
1022; SDAG-NEXT:    v_ffbh_u32_e32 v22, v4
1023; SDAG-NEXT:    v_ffbh_u32_e32 v23, v5
1024; SDAG-NEXT:    v_mov_b32_e32 v24, 0
1025; SDAG-NEXT:    s_mov_b64 s[8:9], 0x7f
1026; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1027; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[2:3]
1028; SDAG-NEXT:    v_add_i32_e64 v0, s[6:7], 32, v8
1029; SDAG-NEXT:    v_add_i32_e64 v1, s[6:7], 32, v10
1030; SDAG-NEXT:    v_add_i32_e64 v2, s[6:7], 32, v20
1031; SDAG-NEXT:    v_add_i32_e64 v3, s[6:7], 32, v22
1032; SDAG-NEXT:    s_or_b64 s[6:7], vcc, s[4:5]
1033; SDAG-NEXT:    v_min_u32_e32 v0, v0, v9
1034; SDAG-NEXT:    v_min_u32_e32 v1, v1, v11
1035; SDAG-NEXT:    v_min_u32_e32 v2, v2, v21
1036; SDAG-NEXT:    v_min_u32_e32 v3, v3, v23
1037; SDAG-NEXT:    v_add_i32_e32 v1, vcc, 64, v1
1038; SDAG-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, vcc
1039; SDAG-NEXT:    v_add_i32_e32 v3, vcc, 64, v3
1040; SDAG-NEXT:    v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
1041; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
1042; SDAG-NEXT:    v_cndmask_b32_e64 v8, v8, 0, vcc
1043; SDAG-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1044; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
1045; SDAG-NEXT:    v_cndmask_b32_e64 v1, v9, 0, vcc
1046; SDAG-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
1047; SDAG-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
1048; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, v8, v1, vcc
1049; SDAG-NEXT:    v_xor_b32_e32 v2, 0x7f, v0
1050; SDAG-NEXT:    v_subbrev_u32_e32 v20, vcc, 0, v24, vcc
1051; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[8:9], v[0:1]
1052; SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
1053; SDAG-NEXT:    v_subbrev_u32_e32 v21, vcc, 0, v24, vcc
1054; SDAG-NEXT:    v_or_b32_e32 v2, v2, v20
1055; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[20:21]
1056; SDAG-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1057; SDAG-NEXT:    v_or_b32_e32 v3, v1, v21
1058; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[20:21]
1059; SDAG-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
1060; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
1061; SDAG-NEXT:    v_and_b32_e32 v2, 1, v8
1062; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v2
1063; SDAG-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
1064; SDAG-NEXT:    v_cndmask_b32_e64 v8, v7, 0, s[4:5]
1065; SDAG-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
1066; SDAG-NEXT:    v_cndmask_b32_e64 v9, v6, 0, s[4:5]
1067; SDAG-NEXT:    v_cndmask_b32_e64 v10, v5, 0, s[4:5]
1068; SDAG-NEXT:    v_cndmask_b32_e64 v11, v4, 0, s[4:5]
1069; SDAG-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
1070; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
1071; SDAG-NEXT:    s_cbranch_execz .LBB1_12
1072; SDAG-NEXT:  ; %bb.7: ; %udiv-bb1
1073; SDAG-NEXT:    v_add_i32_e32 v22, vcc, 1, v0
1074; SDAG-NEXT:    v_sub_i32_e64 v8, s[4:5], 63, v0
1075; SDAG-NEXT:    v_mov_b32_e32 v2, 0
1076; SDAG-NEXT:    v_mov_b32_e32 v3, 0
1077; SDAG-NEXT:    v_addc_u32_e32 v23, vcc, 0, v1, vcc
1078; SDAG-NEXT:    v_lshl_b64 v[8:9], v[4:5], v8
1079; SDAG-NEXT:    v_addc_u32_e32 v24, vcc, 0, v20, vcc
1080; SDAG-NEXT:    v_addc_u32_e32 v25, vcc, 0, v21, vcc
1081; SDAG-NEXT:    v_or_b32_e32 v10, v22, v24
1082; SDAG-NEXT:    v_sub_i32_e32 v26, vcc, 0x7f, v0
1083; SDAG-NEXT:    v_or_b32_e32 v11, v23, v25
1084; SDAG-NEXT:    v_lshl_b64 v[0:1], v[6:7], v26
1085; SDAG-NEXT:    v_sub_i32_e32 v27, vcc, 64, v26
1086; SDAG-NEXT:    v_lshl_b64 v[20:21], v[4:5], v26
1087; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
1088; SDAG-NEXT:    v_lshr_b64 v[10:11], v[4:5], v27
1089; SDAG-NEXT:    v_or_b32_e32 v1, v1, v11
1090; SDAG-NEXT:    v_or_b32_e32 v0, v0, v10
1091; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v26
1092; SDAG-NEXT:    v_cndmask_b32_e64 v1, v9, v1, s[4:5]
1093; SDAG-NEXT:    v_cndmask_b32_e64 v0, v8, v0, s[4:5]
1094; SDAG-NEXT:    v_cndmask_b32_e64 v9, 0, v21, s[4:5]
1095; SDAG-NEXT:    v_cndmask_b32_e64 v8, 0, v20, s[4:5]
1096; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v26
1097; SDAG-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[4:5]
1098; SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v6, s[4:5]
1099; SDAG-NEXT:    v_mov_b32_e32 v20, 0
1100; SDAG-NEXT:    v_mov_b32_e32 v21, 0
1101; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1102; SDAG-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
1103; SDAG-NEXT:    s_cbranch_execz .LBB1_11
1104; SDAG-NEXT:  ; %bb.8: ; %udiv-preheader
1105; SDAG-NEXT:    v_lshr_b64 v[2:3], v[4:5], v22
1106; SDAG-NEXT:    v_sub_i32_e32 v27, vcc, 64, v22
1107; SDAG-NEXT:    v_subrev_i32_e32 v28, vcc, 64, v22
1108; SDAG-NEXT:    v_lshr_b64 v[29:30], v[6:7], v22
1109; SDAG-NEXT:    v_add_i32_e32 v26, vcc, -1, v12
1110; SDAG-NEXT:    s_mov_b64 s[10:11], 0
1111; SDAG-NEXT:    v_mov_b32_e32 v10, 0
1112; SDAG-NEXT:    v_mov_b32_e32 v11, 0
1113; SDAG-NEXT:    v_mov_b32_e32 v20, 0
1114; SDAG-NEXT:    v_mov_b32_e32 v21, 0
1115; SDAG-NEXT:    v_lshl_b64 v[31:32], v[6:7], v27
1116; SDAG-NEXT:    v_lshr_b64 v[6:7], v[6:7], v28
1117; SDAG-NEXT:    v_addc_u32_e32 v27, vcc, -1, v13, vcc
1118; SDAG-NEXT:    v_or_b32_e32 v3, v3, v32
1119; SDAG-NEXT:    v_or_b32_e32 v2, v2, v31
1120; SDAG-NEXT:    v_addc_u32_e32 v28, vcc, -1, v14, vcc
1121; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v22
1122; SDAG-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[4:5]
1123; SDAG-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[4:5]
1124; SDAG-NEXT:    v_cndmask_b32_e64 v7, 0, v30, s[4:5]
1125; SDAG-NEXT:    v_cndmask_b32_e64 v6, 0, v29, s[4:5]
1126; SDAG-NEXT:    v_addc_u32_e32 v29, vcc, -1, v15, vcc
1127; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v22
1128; SDAG-NEXT:    v_cndmask_b32_e32 v5, v3, v5, vcc
1129; SDAG-NEXT:    v_cndmask_b32_e32 v4, v2, v4, vcc
1130; SDAG-NEXT:    v_mov_b32_e32 v3, 0
1131; SDAG-NEXT:  .LBB1_9: ; %udiv-do-while
1132; SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
1133; SDAG-NEXT:    v_lshl_b64 v[6:7], v[6:7], 1
1134; SDAG-NEXT:    v_lshrrev_b32_e32 v2, 31, v5
1135; SDAG-NEXT:    v_lshl_b64 v[4:5], v[4:5], 1
1136; SDAG-NEXT:    v_lshrrev_b32_e32 v30, 31, v1
1137; SDAG-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
1138; SDAG-NEXT:    v_lshrrev_b32_e32 v31, 31, v9
1139; SDAG-NEXT:    v_lshl_b64 v[8:9], v[8:9], 1
1140; SDAG-NEXT:    v_or_b32_e32 v6, v6, v2
1141; SDAG-NEXT:    v_or_b32_e32 v2, v4, v30
1142; SDAG-NEXT:    v_or_b32_e32 v0, v0, v31
1143; SDAG-NEXT:    v_or_b32_e32 v1, v21, v1
1144; SDAG-NEXT:    v_sub_i32_e32 v4, vcc, v26, v2
1145; SDAG-NEXT:    v_subb_u32_e32 v4, vcc, v27, v5, vcc
1146; SDAG-NEXT:    v_subb_u32_e32 v4, vcc, v28, v6, vcc
1147; SDAG-NEXT:    v_subb_u32_e32 v4, vcc, v29, v7, vcc
1148; SDAG-NEXT:    v_ashrrev_i32_e32 v30, 31, v4
1149; SDAG-NEXT:    v_and_b32_e32 v31, v30, v13
1150; SDAG-NEXT:    v_and_b32_e32 v4, v30, v12
1151; SDAG-NEXT:    v_sub_i32_e32 v4, vcc, v2, v4
1152; SDAG-NEXT:    v_subb_u32_e32 v5, vcc, v5, v31, vcc
1153; SDAG-NEXT:    v_or_b32_e32 v9, v11, v9
1154; SDAG-NEXT:    v_or_b32_e32 v0, v20, v0
1155; SDAG-NEXT:    v_and_b32_e32 v2, 1, v30
1156; SDAG-NEXT:    v_and_b32_e32 v11, v30, v15
1157; SDAG-NEXT:    v_and_b32_e32 v30, v30, v14
1158; SDAG-NEXT:    v_subb_u32_e32 v6, vcc, v6, v30, vcc
1159; SDAG-NEXT:    v_subb_u32_e32 v7, vcc, v7, v11, vcc
1160; SDAG-NEXT:    v_add_i32_e32 v22, vcc, -1, v22
1161; SDAG-NEXT:    v_addc_u32_e32 v23, vcc, -1, v23, vcc
1162; SDAG-NEXT:    v_addc_u32_e32 v24, vcc, -1, v24, vcc
1163; SDAG-NEXT:    v_addc_u32_e32 v25, vcc, -1, v25, vcc
1164; SDAG-NEXT:    v_or_b32_e32 v31, v23, v25
1165; SDAG-NEXT:    v_or_b32_e32 v30, v22, v24
1166; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[30:31]
1167; SDAG-NEXT:    s_or_b64 s[10:11], vcc, s[10:11]
1168; SDAG-NEXT:    v_or_b32_e32 v8, v10, v8
1169; SDAG-NEXT:    v_mov_b32_e32 v11, v3
1170; SDAG-NEXT:    v_mov_b32_e32 v10, v2
1171; SDAG-NEXT:    s_andn2_b64 exec, exec, s[10:11]
1172; SDAG-NEXT:    s_cbranch_execnz .LBB1_9
1173; SDAG-NEXT:  ; %bb.10: ; %Flow
1174; SDAG-NEXT:    s_or_b64 exec, exec, s[10:11]
1175; SDAG-NEXT:  .LBB1_11: ; %Flow11
1176; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
1177; SDAG-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
1178; SDAG-NEXT:    v_lshrrev_b32_e32 v6, 31, v9
1179; SDAG-NEXT:    v_lshl_b64 v[4:5], v[8:9], 1
1180; SDAG-NEXT:    v_or_b32_e32 v0, v0, v6
1181; SDAG-NEXT:    v_or_b32_e32 v8, v21, v1
1182; SDAG-NEXT:    v_or_b32_e32 v10, v3, v5
1183; SDAG-NEXT:    v_or_b32_e32 v9, v20, v0
1184; SDAG-NEXT:    v_or_b32_e32 v11, v2, v4
1185; SDAG-NEXT:  .LBB1_12: ; %Flow12
1186; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
1187; SDAG-NEXT:    v_mov_b32_e32 v0, v19
1188; SDAG-NEXT:    v_mov_b32_e32 v1, v18
1189; SDAG-NEXT:    v_mov_b32_e32 v2, v17
1190; SDAG-NEXT:    v_mov_b32_e32 v3, v16
1191; SDAG-NEXT:    v_mov_b32_e32 v4, v11
1192; SDAG-NEXT:    v_mov_b32_e32 v5, v10
1193; SDAG-NEXT:    v_mov_b32_e32 v6, v9
1194; SDAG-NEXT:    v_mov_b32_e32 v7, v8
1195; SDAG-NEXT:    s_setpc_b64 s[30:31]
1196;
1197; GISEL-LABEL: v_udiv_v2i128_vv:
1198; GISEL:       ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
1199; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1200; GISEL-NEXT:    v_mov_b32_e32 v16, v2
1201; GISEL-NEXT:    v_mov_b32_e32 v17, v3
1202; GISEL-NEXT:    v_or_b32_e32 v2, v8, v10
1203; GISEL-NEXT:    v_or_b32_e32 v3, v9, v11
1204; GISEL-NEXT:    v_or_b32_e32 v18, v0, v16
1205; GISEL-NEXT:    v_or_b32_e32 v19, v1, v17
1206; GISEL-NEXT:    v_ffbh_u32_e32 v20, v9
1207; GISEL-NEXT:    v_ffbh_u32_e32 v21, v8
1208; GISEL-NEXT:    v_ffbh_u32_e32 v22, v11
1209; GISEL-NEXT:    v_ffbh_u32_e32 v23, v10
1210; GISEL-NEXT:    v_ffbh_u32_e32 v26, v1
1211; GISEL-NEXT:    v_ffbh_u32_e32 v27, v0
1212; GISEL-NEXT:    v_ffbh_u32_e32 v28, v16
1213; GISEL-NEXT:    v_ffbh_u32_e32 v29, v17
1214; GISEL-NEXT:    v_mov_b32_e32 v24, 0x7f
1215; GISEL-NEXT:    v_mov_b32_e32 v25, 0
1216; GISEL-NEXT:    s_mov_b64 s[8:9], 0
1217; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
1218; GISEL-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
1219; GISEL-NEXT:    v_add_i32_e64 v2, s[6:7], 32, v21
1220; GISEL-NEXT:    v_add_i32_e64 v3, s[6:7], 32, v23
1221; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], 32, v27
1222; GISEL-NEXT:    v_add_i32_e64 v19, s[6:7], 32, v28
1223; GISEL-NEXT:    v_min_u32_e32 v2, v20, v2
1224; GISEL-NEXT:    v_min_u32_e32 v3, v22, v3
1225; GISEL-NEXT:    v_min_u32_e32 v18, v26, v18
1226; GISEL-NEXT:    v_min_u32_e32 v19, v29, v19
1227; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1228; GISEL-NEXT:    v_cndmask_b32_e64 v26, 0, 1, s[4:5]
1229; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 64, v2
1230; GISEL-NEXT:    v_add_i32_e32 v18, vcc, 64, v18
1231; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
1232; GISEL-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
1233; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
1234; GISEL-NEXT:    v_cndmask_b32_e32 v3, v19, v18, vcc
1235; GISEL-NEXT:    v_sub_i32_e32 v22, vcc, v2, v3
1236; GISEL-NEXT:    v_subb_u32_e64 v23, s[4:5], 0, 0, vcc
1237; GISEL-NEXT:    v_subb_u32_e64 v20, s[4:5], 0, 0, s[4:5]
1238; GISEL-NEXT:    v_subb_u32_e64 v21, s[4:5], 0, 0, s[4:5]
1239; GISEL-NEXT:    v_xor_b32_e32 v2, 0x7f, v22
1240; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[22:23], v[24:25]
1241; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1242; GISEL-NEXT:    v_or_b32_e32 v2, v2, v20
1243; GISEL-NEXT:    v_or_b32_e32 v3, v23, v21
1244; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[20:21]
1245; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
1246; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[20:21]
1247; GISEL-NEXT:    v_cndmask_b32_e32 v18, v19, v18, vcc
1248; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
1249; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
1250; GISEL-NEXT:    v_or_b32_e32 v3, v26, v18
1251; GISEL-NEXT:    v_or_b32_e32 v2, v3, v2
1252; GISEL-NEXT:    v_and_b32_e32 v3, 1, v3
1253; GISEL-NEXT:    v_and_b32_e32 v2, 1, v2
1254; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
1255; GISEL-NEXT:    v_cndmask_b32_e64 v18, v0, 0, vcc
1256; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v2
1257; GISEL-NEXT:    v_cndmask_b32_e64 v2, v16, 0, vcc
1258; GISEL-NEXT:    v_cndmask_b32_e64 v3, v17, 0, vcc
1259; GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
1260; GISEL-NEXT:    v_cndmask_b32_e64 v19, v1, 0, vcc
1261; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
1262; GISEL-NEXT:    s_cbranch_execz .LBB1_6
1263; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
1264; GISEL-NEXT:    v_add_i32_e32 v26, vcc, 1, v22
1265; GISEL-NEXT:    v_addc_u32_e64 v27, s[4:5], 0, v23, vcc
1266; GISEL-NEXT:    v_sub_i32_e32 v30, vcc, 0x7f, v22
1267; GISEL-NEXT:    v_not_b32_e32 v2, 63
1268; GISEL-NEXT:    v_addc_u32_e64 v28, vcc, 0, v20, s[4:5]
1269; GISEL-NEXT:    v_addc_u32_e32 v29, vcc, 0, v21, vcc
1270; GISEL-NEXT:    v_add_i32_e64 v22, s[4:5], v30, v2
1271; GISEL-NEXT:    v_sub_i32_e64 v20, s[4:5], 64, v30
1272; GISEL-NEXT:    v_lshl_b64 v[2:3], v[0:1], v30
1273; GISEL-NEXT:    v_lshl_b64 v[18:19], v[16:17], v30
1274; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
1275; GISEL-NEXT:    v_lshr_b64 v[20:21], v[0:1], v20
1276; GISEL-NEXT:    v_lshl_b64 v[24:25], v[0:1], v22
1277; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v30
1278; GISEL-NEXT:    v_cndmask_b32_e32 v22, 0, v2, vcc
1279; GISEL-NEXT:    v_cndmask_b32_e32 v23, 0, v3, vcc
1280; GISEL-NEXT:    v_or_b32_e32 v2, v20, v18
1281; GISEL-NEXT:    v_or_b32_e32 v3, v21, v19
1282; GISEL-NEXT:    v_cndmask_b32_e32 v2, v24, v2, vcc
1283; GISEL-NEXT:    v_cndmask_b32_e32 v3, v25, v3, vcc
1284; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v30
1285; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v16, vcc
1286; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v17, vcc
1287; GISEL-NEXT:    s_mov_b64 s[10:11], s[8:9]
1288; GISEL-NEXT:    v_mov_b32_e32 v21, s11
1289; GISEL-NEXT:    v_mov_b32_e32 v20, s10
1290; GISEL-NEXT:    v_mov_b32_e32 v19, s9
1291; GISEL-NEXT:    v_mov_b32_e32 v18, s8
1292; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
1293; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
1294; GISEL-NEXT:    s_cbranch_execz .LBB1_5
1295; GISEL-NEXT:  ; %bb.2: ; %udiv-preheader4
1296; GISEL-NEXT:    v_add_i32_e32 v32, vcc, 0xffffffc0, v26
1297; GISEL-NEXT:    v_sub_i32_e32 v24, vcc, 64, v26
1298; GISEL-NEXT:    v_lshr_b64 v[18:19], v[16:17], v26
1299; GISEL-NEXT:    v_lshr_b64 v[20:21], v[0:1], v26
1300; GISEL-NEXT:    s_mov_b64 s[4:5], 0
1301; GISEL-NEXT:    v_add_i32_e32 v30, vcc, -1, v8
1302; GISEL-NEXT:    v_addc_u32_e32 v31, vcc, -1, v9, vcc
1303; GISEL-NEXT:    v_lshl_b64 v[24:25], v[16:17], v24
1304; GISEL-NEXT:    v_lshr_b64 v[16:17], v[16:17], v32
1305; GISEL-NEXT:    v_addc_u32_e32 v32, vcc, -1, v10, vcc
1306; GISEL-NEXT:    v_addc_u32_e32 v33, vcc, -1, v11, vcc
1307; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
1308; GISEL-NEXT:    v_or_b32_e32 v20, v20, v24
1309; GISEL-NEXT:    v_or_b32_e32 v21, v21, v25
1310; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v26
1311; GISEL-NEXT:    v_cndmask_b32_e32 v20, v16, v20, vcc
1312; GISEL-NEXT:    v_cndmask_b32_e32 v21, v17, v21, vcc
1313; GISEL-NEXT:    v_cndmask_b32_e32 v16, 0, v18, vcc
1314; GISEL-NEXT:    v_cndmask_b32_e32 v17, 0, v19, vcc
1315; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v26
1316; GISEL-NEXT:    v_cndmask_b32_e32 v24, v20, v0, vcc
1317; GISEL-NEXT:    v_cndmask_b32_e32 v25, v21, v1, vcc
1318; GISEL-NEXT:    v_mov_b32_e32 v1, 0
1319; GISEL-NEXT:    v_mov_b32_e32 v21, s7
1320; GISEL-NEXT:    v_mov_b32_e32 v20, s6
1321; GISEL-NEXT:    v_mov_b32_e32 v19, s5
1322; GISEL-NEXT:    v_mov_b32_e32 v18, s4
1323; GISEL-NEXT:  .LBB1_3: ; %udiv-do-while3
1324; GISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
1325; GISEL-NEXT:    v_lshrrev_b32_e32 v34, 31, v23
1326; GISEL-NEXT:    v_lshl_b64 v[20:21], v[22:23], 1
1327; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 31, v25
1328; GISEL-NEXT:    v_lshl_b64 v[24:25], v[24:25], 1
1329; GISEL-NEXT:    v_lshl_b64 v[16:17], v[16:17], 1
1330; GISEL-NEXT:    v_lshrrev_b32_e32 v35, 31, v3
1331; GISEL-NEXT:    v_add_i32_e32 v26, vcc, -1, v26
1332; GISEL-NEXT:    v_addc_u32_e32 v27, vcc, -1, v27, vcc
1333; GISEL-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
1334; GISEL-NEXT:    v_or_b32_e32 v22, v18, v20
1335; GISEL-NEXT:    v_or_b32_e32 v23, v19, v21
1336; GISEL-NEXT:    v_or_b32_e32 v16, v16, v0
1337; GISEL-NEXT:    v_or_b32_e32 v20, v24, v35
1338; GISEL-NEXT:    v_addc_u32_e32 v28, vcc, -1, v28, vcc
1339; GISEL-NEXT:    v_addc_u32_e32 v29, vcc, -1, v29, vcc
1340; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v30, v20
1341; GISEL-NEXT:    v_subb_u32_e32 v0, vcc, v31, v25, vcc
1342; GISEL-NEXT:    v_or_b32_e32 v18, v26, v28
1343; GISEL-NEXT:    v_or_b32_e32 v19, v27, v29
1344; GISEL-NEXT:    v_subb_u32_e32 v0, vcc, v32, v16, vcc
1345; GISEL-NEXT:    v_subb_u32_e32 v0, vcc, v33, v17, vcc
1346; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[18:19]
1347; GISEL-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
1348; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1349; GISEL-NEXT:    v_and_b32_e32 v18, v0, v8
1350; GISEL-NEXT:    v_and_b32_e32 v19, v0, v9
1351; GISEL-NEXT:    v_and_b32_e32 v21, v0, v10
1352; GISEL-NEXT:    v_and_b32_e32 v35, v0, v11
1353; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
1354; GISEL-NEXT:    v_sub_i32_e32 v24, vcc, v20, v18
1355; GISEL-NEXT:    v_subb_u32_e32 v25, vcc, v25, v19, vcc
1356; GISEL-NEXT:    v_subb_u32_e32 v16, vcc, v16, v21, vcc
1357; GISEL-NEXT:    v_subb_u32_e32 v17, vcc, v17, v35, vcc
1358; GISEL-NEXT:    v_or_b32_e32 v2, v2, v34
1359; GISEL-NEXT:    v_mov_b32_e32 v19, v1
1360; GISEL-NEXT:    v_mov_b32_e32 v18, v0
1361; GISEL-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1362; GISEL-NEXT:    s_cbranch_execnz .LBB1_3
1363; GISEL-NEXT:  ; %bb.4: ; %Flow13
1364; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
1365; GISEL-NEXT:  .LBB1_5: ; %Flow14
1366; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
1367; GISEL-NEXT:    v_lshl_b64 v[0:1], v[22:23], 1
1368; GISEL-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
1369; GISEL-NEXT:    v_lshrrev_b32_e32 v8, 31, v23
1370; GISEL-NEXT:    v_or_b32_e32 v2, v2, v8
1371; GISEL-NEXT:    v_or_b32_e32 v18, v18, v0
1372; GISEL-NEXT:    v_or_b32_e32 v19, v19, v1
1373; GISEL-NEXT:  .LBB1_6: ; %Flow16
1374; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
1375; GISEL-NEXT:    s_mov_b64 s[8:9], 0
1376; GISEL-NEXT:    v_or_b32_e32 v0, v12, v14
1377; GISEL-NEXT:    v_or_b32_e32 v1, v13, v15
1378; GISEL-NEXT:    v_or_b32_e32 v8, v4, v6
1379; GISEL-NEXT:    v_or_b32_e32 v9, v5, v7
1380; GISEL-NEXT:    v_ffbh_u32_e32 v16, v13
1381; GISEL-NEXT:    v_ffbh_u32_e32 v17, v12
1382; GISEL-NEXT:    v_ffbh_u32_e32 v20, v15
1383; GISEL-NEXT:    v_ffbh_u32_e32 v21, v14
1384; GISEL-NEXT:    v_ffbh_u32_e32 v22, v5
1385; GISEL-NEXT:    v_ffbh_u32_e32 v23, v4
1386; GISEL-NEXT:    v_ffbh_u32_e32 v24, v7
1387; GISEL-NEXT:    v_ffbh_u32_e32 v25, v6
1388; GISEL-NEXT:    v_mov_b32_e32 v10, 0x7f
1389; GISEL-NEXT:    v_mov_b32_e32 v11, 0
1390; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1391; GISEL-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[8:9]
1392; GISEL-NEXT:    v_add_i32_e64 v0, s[6:7], 32, v17
1393; GISEL-NEXT:    v_add_i32_e64 v1, s[6:7], 32, v21
1394; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], 32, v23
1395; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], 32, v25
1396; GISEL-NEXT:    v_min_u32_e32 v0, v16, v0
1397; GISEL-NEXT:    v_min_u32_e32 v1, v20, v1
1398; GISEL-NEXT:    v_min_u32_e32 v8, v22, v8
1399; GISEL-NEXT:    v_min_u32_e32 v9, v24, v9
1400; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1401; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[4:5]
1402; GISEL-NEXT:    v_add_i32_e32 v0, vcc, 64, v0
1403; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 64, v8
1404; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
1405; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1406; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
1407; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc
1408; GISEL-NEXT:    v_sub_i32_e32 v16, vcc, v0, v1
1409; GISEL-NEXT:    v_subb_u32_e64 v17, s[4:5], 0, 0, vcc
1410; GISEL-NEXT:    v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
1411; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
1412; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[16:17], v[10:11]
1413; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1414; GISEL-NEXT:    v_xor_b32_e32 v8, 0x7f, v16
1415; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
1416; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1417; GISEL-NEXT:    v_or_b32_e32 v8, v8, v0
1418; GISEL-NEXT:    v_or_b32_e32 v9, v17, v1
1419; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1420; GISEL-NEXT:    v_cndmask_b32_e32 v10, v11, v10, vcc
1421; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
1422; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1423; GISEL-NEXT:    v_or_b32_e32 v9, v20, v10
1424; GISEL-NEXT:    v_and_b32_e32 v10, 1, v9
1425; GISEL-NEXT:    v_or_b32_e32 v8, v9, v8
1426; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
1427; GISEL-NEXT:    v_cndmask_b32_e64 v10, v4, 0, vcc
1428; GISEL-NEXT:    v_and_b32_e32 v20, 1, v8
1429; GISEL-NEXT:    v_cndmask_b32_e64 v11, v5, 0, vcc
1430; GISEL-NEXT:    v_cndmask_b32_e64 v8, v6, 0, vcc
1431; GISEL-NEXT:    v_cndmask_b32_e64 v9, v7, 0, vcc
1432; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v20
1433; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
1434; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
1435; GISEL-NEXT:    s_cbranch_execz .LBB1_12
1436; GISEL-NEXT:  ; %bb.7: ; %udiv-bb1
1437; GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v16
1438; GISEL-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, v17, vcc
1439; GISEL-NEXT:    v_sub_i32_e32 v26, vcc, 0x7f, v16
1440; GISEL-NEXT:    v_not_b32_e32 v9, 63
1441; GISEL-NEXT:    v_addc_u32_e64 v24, vcc, 0, v0, s[4:5]
1442; GISEL-NEXT:    v_addc_u32_e32 v25, vcc, 0, v1, vcc
1443; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v26, v9
1444; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], 64, v26
1445; GISEL-NEXT:    v_lshl_b64 v[0:1], v[4:5], v26
1446; GISEL-NEXT:    v_lshl_b64 v[16:17], v[6:7], v26
1447; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
1448; GISEL-NEXT:    v_lshr_b64 v[20:21], v[4:5], v10
1449; GISEL-NEXT:    v_lshl_b64 v[22:23], v[4:5], v9
1450; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v26
1451; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v0, vcc
1452; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v1, vcc
1453; GISEL-NEXT:    v_or_b32_e32 v0, v20, v16
1454; GISEL-NEXT:    v_or_b32_e32 v1, v21, v17
1455; GISEL-NEXT:    v_cndmask_b32_e32 v0, v22, v0, vcc
1456; GISEL-NEXT:    v_cndmask_b32_e32 v1, v23, v1, vcc
1457; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v26
1458; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1459; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1460; GISEL-NEXT:    s_mov_b64 s[10:11], s[8:9]
1461; GISEL-NEXT:    v_mov_b32_e32 v23, s11
1462; GISEL-NEXT:    v_mov_b32_e32 v22, s10
1463; GISEL-NEXT:    v_mov_b32_e32 v21, s9
1464; GISEL-NEXT:    v_mov_b32_e32 v20, s8
1465; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
1466; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
1467; GISEL-NEXT:    s_cbranch_execz .LBB1_11
1468; GISEL-NEXT:  ; %bb.8: ; %udiv-preheader
1469; GISEL-NEXT:    v_add_i32_e32 v28, vcc, 0xffffffc0, v8
1470; GISEL-NEXT:    v_sub_i32_e32 v22, vcc, 64, v8
1471; GISEL-NEXT:    v_lshr_b64 v[16:17], v[6:7], v8
1472; GISEL-NEXT:    v_lshr_b64 v[20:21], v[4:5], v8
1473; GISEL-NEXT:    s_mov_b64 s[4:5], 0
1474; GISEL-NEXT:    v_add_i32_e32 v26, vcc, -1, v12
1475; GISEL-NEXT:    v_addc_u32_e32 v27, vcc, -1, v13, vcc
1476; GISEL-NEXT:    v_lshl_b64 v[22:23], v[6:7], v22
1477; GISEL-NEXT:    v_lshr_b64 v[6:7], v[6:7], v28
1478; GISEL-NEXT:    v_addc_u32_e32 v28, vcc, -1, v14, vcc
1479; GISEL-NEXT:    v_addc_u32_e32 v29, vcc, -1, v15, vcc
1480; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
1481; GISEL-NEXT:    v_or_b32_e32 v20, v20, v22
1482; GISEL-NEXT:    v_or_b32_e32 v21, v21, v23
1483; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v8
1484; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v20, vcc
1485; GISEL-NEXT:    v_cndmask_b32_e32 v7, v7, v21, vcc
1486; GISEL-NEXT:    v_cndmask_b32_e32 v16, 0, v16, vcc
1487; GISEL-NEXT:    v_cndmask_b32_e32 v17, 0, v17, vcc
1488; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
1489; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v4, vcc
1490; GISEL-NEXT:    v_cndmask_b32_e32 v7, v7, v5, vcc
1491; GISEL-NEXT:    v_mov_b32_e32 v5, 0
1492; GISEL-NEXT:    v_mov_b32_e32 v23, s7
1493; GISEL-NEXT:    v_mov_b32_e32 v22, s6
1494; GISEL-NEXT:    v_mov_b32_e32 v21, s5
1495; GISEL-NEXT:    v_mov_b32_e32 v20, s4
1496; GISEL-NEXT:  .LBB1_9: ; %udiv-do-while
1497; GISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
1498; GISEL-NEXT:    v_lshl_b64 v[22:23], v[6:7], 1
1499; GISEL-NEXT:    v_lshl_b64 v[16:17], v[16:17], 1
1500; GISEL-NEXT:    v_lshrrev_b32_e32 v4, 31, v7
1501; GISEL-NEXT:    v_lshrrev_b32_e32 v30, 31, v1
1502; GISEL-NEXT:    v_lshl_b64 v[6:7], v[9:10], 1
1503; GISEL-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
1504; GISEL-NEXT:    v_lshrrev_b32_e32 v9, 31, v10
1505; GISEL-NEXT:    v_add_i32_e32 v8, vcc, -1, v8
1506; GISEL-NEXT:    v_addc_u32_e32 v11, vcc, -1, v11, vcc
1507; GISEL-NEXT:    v_or_b32_e32 v16, v16, v4
1508; GISEL-NEXT:    v_or_b32_e32 v22, v22, v30
1509; GISEL-NEXT:    v_or_b32_e32 v0, v0, v9
1510; GISEL-NEXT:    v_or_b32_e32 v9, v20, v6
1511; GISEL-NEXT:    v_or_b32_e32 v10, v21, v7
1512; GISEL-NEXT:    v_addc_u32_e32 v24, vcc, -1, v24, vcc
1513; GISEL-NEXT:    v_addc_u32_e32 v25, vcc, -1, v25, vcc
1514; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v26, v22
1515; GISEL-NEXT:    v_subb_u32_e32 v4, vcc, v27, v23, vcc
1516; GISEL-NEXT:    v_or_b32_e32 v6, v8, v24
1517; GISEL-NEXT:    v_or_b32_e32 v7, v11, v25
1518; GISEL-NEXT:    v_subb_u32_e32 v4, vcc, v28, v16, vcc
1519; GISEL-NEXT:    v_subb_u32_e32 v4, vcc, v29, v17, vcc
1520; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
1521; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v4
1522; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1523; GISEL-NEXT:    v_and_b32_e32 v4, 1, v6
1524; GISEL-NEXT:    v_and_b32_e32 v7, v6, v12
1525; GISEL-NEXT:    v_and_b32_e32 v30, v6, v13
1526; GISEL-NEXT:    v_and_b32_e32 v31, v6, v14
1527; GISEL-NEXT:    v_and_b32_e32 v32, v6, v15
1528; GISEL-NEXT:    v_mov_b32_e32 v21, v5
1529; GISEL-NEXT:    v_mov_b32_e32 v20, v4
1530; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v22, v7
1531; GISEL-NEXT:    v_subb_u32_e32 v7, vcc, v23, v30, vcc
1532; GISEL-NEXT:    v_subb_u32_e32 v16, vcc, v16, v31, vcc
1533; GISEL-NEXT:    v_subb_u32_e32 v17, vcc, v17, v32, vcc
1534; GISEL-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1535; GISEL-NEXT:    s_cbranch_execnz .LBB1_9
1536; GISEL-NEXT:  ; %bb.10: ; %Flow
1537; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
1538; GISEL-NEXT:  .LBB1_11: ; %Flow11
1539; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
1540; GISEL-NEXT:    v_lshl_b64 v[4:5], v[9:10], 1
1541; GISEL-NEXT:    v_lshl_b64 v[8:9], v[0:1], 1
1542; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 31, v10
1543; GISEL-NEXT:    v_or_b32_e32 v8, v8, v0
1544; GISEL-NEXT:    v_or_b32_e32 v10, v20, v4
1545; GISEL-NEXT:    v_or_b32_e32 v11, v21, v5
1546; GISEL-NEXT:  .LBB1_12: ; %Flow12
1547; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
1548; GISEL-NEXT:    v_mov_b32_e32 v0, v18
1549; GISEL-NEXT:    v_mov_b32_e32 v1, v19
1550; GISEL-NEXT:    v_mov_b32_e32 v4, v10
1551; GISEL-NEXT:    v_mov_b32_e32 v5, v11
1552; GISEL-NEXT:    v_mov_b32_e32 v6, v8
1553; GISEL-NEXT:    v_mov_b32_e32 v7, v9
1554; GISEL-NEXT:    s_setpc_b64 s[30:31]
1555  %shl = udiv <2 x i128> %lhs, %rhs
1556  ret <2 x i128> %shl
1557}
1558
1559define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
1560; SDAG-LABEL: v_srem_v2i128_vv:
1561; SDAG:       ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
1562; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1563; SDAG-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1564; SDAG-NEXT:    v_sub_i32_e32 v16, vcc, 0, v0
1565; SDAG-NEXT:    v_mov_b32_e32 v19, 0
1566; SDAG-NEXT:    v_ashrrev_i32_e32 v28, 31, v3
1567; SDAG-NEXT:    s_mov_b64 s[10:11], 0x7f
1568; SDAG-NEXT:    v_subb_u32_e32 v17, vcc, 0, v1, vcc
1569; SDAG-NEXT:    v_mov_b32_e32 v29, v28
1570; SDAG-NEXT:    v_subb_u32_e32 v18, vcc, 0, v2, vcc
1571; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
1572; SDAG-NEXT:    v_cndmask_b32_e64 v17, v1, v17, s[4:5]
1573; SDAG-NEXT:    v_cndmask_b32_e64 v16, v0, v16, s[4:5]
1574; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, 0, v3, vcc
1575; SDAG-NEXT:    v_cndmask_b32_e64 v0, v2, v18, s[4:5]
1576; SDAG-NEXT:    v_ffbh_u32_e32 v18, v16
1577; SDAG-NEXT:    v_ffbh_u32_e32 v20, v17
1578; SDAG-NEXT:    v_sub_i32_e32 v21, vcc, 0, v8
1579; SDAG-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
1580; SDAG-NEXT:    v_or_b32_e32 v2, v16, v0
1581; SDAG-NEXT:    v_add_i32_e64 v18, s[4:5], 32, v18
1582; SDAG-NEXT:    v_ffbh_u32_e32 v22, v0
1583; SDAG-NEXT:    v_subb_u32_e32 v23, vcc, 0, v9, vcc
1584; SDAG-NEXT:    v_or_b32_e32 v3, v17, v1
1585; SDAG-NEXT:    v_min_u32_e32 v18, v18, v20
1586; SDAG-NEXT:    v_add_i32_e64 v20, s[4:5], 32, v22
1587; SDAG-NEXT:    v_ffbh_u32_e32 v22, v1
1588; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[10:11]
1589; SDAG-NEXT:    v_cndmask_b32_e64 v30, v9, v23, s[4:5]
1590; SDAG-NEXT:    v_subb_u32_e32 v9, vcc, 0, v10, vcc
1591; SDAG-NEXT:    v_cndmask_b32_e64 v31, v8, v21, s[4:5]
1592; SDAG-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[2:3]
1593; SDAG-NEXT:    v_min_u32_e32 v3, v20, v22
1594; SDAG-NEXT:    v_add_i32_e64 v8, s[8:9], 64, v18
1595; SDAG-NEXT:    v_addc_u32_e64 v18, s[8:9], 0, 0, s[8:9]
1596; SDAG-NEXT:    v_subb_u32_e32 v20, vcc, 0, v11, vcc
1597; SDAG-NEXT:    v_cndmask_b32_e64 v2, v10, v9, s[4:5]
1598; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1599; SDAG-NEXT:    v_cndmask_b32_e64 v18, v18, 0, vcc
1600; SDAG-NEXT:    v_cndmask_b32_e32 v10, v8, v3, vcc
1601; SDAG-NEXT:    v_ffbh_u32_e32 v9, v31
1602; SDAG-NEXT:    v_ffbh_u32_e32 v21, v30
1603; SDAG-NEXT:    v_cndmask_b32_e64 v3, v11, v20, s[4:5]
1604; SDAG-NEXT:    v_or_b32_e32 v8, v31, v2
1605; SDAG-NEXT:    v_add_i32_e32 v11, vcc, 32, v9
1606; SDAG-NEXT:    v_ffbh_u32_e32 v20, v2
1607; SDAG-NEXT:    v_or_b32_e32 v9, v30, v3
1608; SDAG-NEXT:    v_min_u32_e32 v11, v11, v21
1609; SDAG-NEXT:    v_add_i32_e32 v20, vcc, 32, v20
1610; SDAG-NEXT:    v_ffbh_u32_e32 v21, v3
1611; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
1612; SDAG-NEXT:    v_min_u32_e32 v8, v20, v21
1613; SDAG-NEXT:    v_add_i32_e64 v9, s[4:5], 64, v11
1614; SDAG-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
1615; SDAG-NEXT:    v_cmp_ne_u64_e64 s[4:5], 0, v[2:3]
1616; SDAG-NEXT:    v_cndmask_b32_e64 v11, v11, 0, s[4:5]
1617; SDAG-NEXT:    v_cndmask_b32_e64 v8, v9, v8, s[4:5]
1618; SDAG-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
1619; SDAG-NEXT:    v_sub_i32_e32 v10, vcc, v8, v10
1620; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, v11, v18, vcc
1621; SDAG-NEXT:    v_xor_b32_e32 v8, 0x7f, v10
1622; SDAG-NEXT:    v_subbrev_u32_e32 v18, vcc, 0, v19, vcc
1623; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[10:11], v[10:11]
1624; SDAG-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[4:5]
1625; SDAG-NEXT:    v_subbrev_u32_e32 v19, vcc, 0, v19, vcc
1626; SDAG-NEXT:    v_or_b32_e32 v8, v8, v18
1627; SDAG-NEXT:    v_or_b32_e32 v9, v11, v19
1628; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[18:19]
1629; SDAG-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
1630; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
1631; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
1632; SDAG-NEXT:    v_cndmask_b32_e64 v8, v21, v20, s[4:5]
1633; SDAG-NEXT:    v_and_b32_e32 v8, 1, v8
1634; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v8
1635; SDAG-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
1636; SDAG-NEXT:    v_cndmask_b32_e64 v35, v1, 0, s[4:5]
1637; SDAG-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
1638; SDAG-NEXT:    v_cndmask_b32_e64 v32, v0, 0, s[4:5]
1639; SDAG-NEXT:    v_cndmask_b32_e64 v27, v17, 0, s[4:5]
1640; SDAG-NEXT:    s_and_b64 s[8:9], s[6:7], vcc
1641; SDAG-NEXT:    v_cndmask_b32_e64 v33, v16, 0, s[4:5]
1642; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[8:9]
1643; SDAG-NEXT:    s_cbranch_execz .LBB2_6
1644; SDAG-NEXT:  ; %bb.1: ; %udiv-bb15
1645; SDAG-NEXT:    v_add_i32_e32 v32, vcc, 1, v10
1646; SDAG-NEXT:    v_sub_i32_e64 v20, s[4:5], 63, v10
1647; SDAG-NEXT:    v_mov_b32_e32 v8, 0
1648; SDAG-NEXT:    v_mov_b32_e32 v9, 0
1649; SDAG-NEXT:    v_addc_u32_e32 v33, vcc, 0, v11, vcc
1650; SDAG-NEXT:    v_lshl_b64 v[20:21], v[16:17], v20
1651; SDAG-NEXT:    v_addc_u32_e32 v34, vcc, 0, v18, vcc
1652; SDAG-NEXT:    v_addc_u32_e32 v35, vcc, 0, v19, vcc
1653; SDAG-NEXT:    v_or_b32_e32 v18, v32, v34
1654; SDAG-NEXT:    v_sub_i32_e32 v24, vcc, 0x7f, v10
1655; SDAG-NEXT:    v_or_b32_e32 v19, v33, v35
1656; SDAG-NEXT:    v_lshl_b64 v[10:11], v[0:1], v24
1657; SDAG-NEXT:    v_sub_i32_e32 v25, vcc, 64, v24
1658; SDAG-NEXT:    v_lshl_b64 v[22:23], v[16:17], v24
1659; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[18:19]
1660; SDAG-NEXT:    v_lshr_b64 v[18:19], v[16:17], v25
1661; SDAG-NEXT:    v_or_b32_e32 v11, v11, v19
1662; SDAG-NEXT:    v_or_b32_e32 v10, v10, v18
1663; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
1664; SDAG-NEXT:    v_cndmask_b32_e64 v11, v21, v11, s[4:5]
1665; SDAG-NEXT:    v_cndmask_b32_e64 v10, v20, v10, s[4:5]
1666; SDAG-NEXT:    v_cndmask_b32_e64 v21, 0, v23, s[4:5]
1667; SDAG-NEXT:    v_cndmask_b32_e64 v20, 0, v22, s[4:5]
1668; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v24
1669; SDAG-NEXT:    v_cndmask_b32_e64 v11, v11, v1, s[4:5]
1670; SDAG-NEXT:    v_cndmask_b32_e64 v10, v10, v0, s[4:5]
1671; SDAG-NEXT:    v_mov_b32_e32 v18, 0
1672; SDAG-NEXT:    v_mov_b32_e32 v19, 0
1673; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1674; SDAG-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
1675; SDAG-NEXT:    s_cbranch_execz .LBB2_5
1676; SDAG-NEXT:  ; %bb.2: ; %udiv-preheader4
1677; SDAG-NEXT:    v_lshr_b64 v[8:9], v[16:17], v32
1678; SDAG-NEXT:    v_sub_i32_e32 v26, vcc, 64, v32
1679; SDAG-NEXT:    v_subrev_i32_e32 v37, vcc, 64, v32
1680; SDAG-NEXT:    v_lshr_b64 v[24:25], v[0:1], v32
1681; SDAG-NEXT:    v_add_i32_e32 v36, vcc, -1, v31
1682; SDAG-NEXT:    s_mov_b64 s[10:11], 0
1683; SDAG-NEXT:    v_mov_b32_e32 v22, 0
1684; SDAG-NEXT:    v_mov_b32_e32 v23, 0
1685; SDAG-NEXT:    v_mov_b32_e32 v18, 0
1686; SDAG-NEXT:    v_mov_b32_e32 v19, 0
1687; SDAG-NEXT:    v_lshl_b64 v[26:27], v[0:1], v26
1688; SDAG-NEXT:    v_lshr_b64 v[48:49], v[0:1], v37
1689; SDAG-NEXT:    v_addc_u32_e32 v37, vcc, -1, v30, vcc
1690; SDAG-NEXT:    v_or_b32_e32 v9, v9, v27
1691; SDAG-NEXT:    v_or_b32_e32 v8, v8, v26
1692; SDAG-NEXT:    v_addc_u32_e32 v38, vcc, -1, v2, vcc
1693; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v32
1694; SDAG-NEXT:    v_cndmask_b32_e64 v9, v49, v9, s[4:5]
1695; SDAG-NEXT:    v_cndmask_b32_e64 v8, v48, v8, s[4:5]
1696; SDAG-NEXT:    v_cndmask_b32_e64 v27, 0, v25, s[4:5]
1697; SDAG-NEXT:    v_cndmask_b32_e64 v26, 0, v24, s[4:5]
1698; SDAG-NEXT:    v_addc_u32_e32 v39, vcc, -1, v3, vcc
1699; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v32
1700; SDAG-NEXT:    v_cndmask_b32_e32 v25, v9, v17, vcc
1701; SDAG-NEXT:    v_cndmask_b32_e32 v24, v8, v16, vcc
1702; SDAG-NEXT:    v_mov_b32_e32 v9, 0
1703; SDAG-NEXT:  .LBB2_3: ; %udiv-do-while3
1704; SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
1705; SDAG-NEXT:    v_lshrrev_b32_e32 v8, 31, v21
1706; SDAG-NEXT:    v_lshl_b64 v[20:21], v[20:21], 1
1707; SDAG-NEXT:    v_lshl_b64 v[26:27], v[26:27], 1
1708; SDAG-NEXT:    v_lshrrev_b32_e32 v48, 31, v25
1709; SDAG-NEXT:    v_lshl_b64 v[24:25], v[24:25], 1
1710; SDAG-NEXT:    v_lshrrev_b32_e32 v49, 31, v11
1711; SDAG-NEXT:    v_lshl_b64 v[10:11], v[10:11], 1
1712; SDAG-NEXT:    v_or_b32_e32 v21, v23, v21
1713; SDAG-NEXT:    v_or_b32_e32 v20, v22, v20
1714; SDAG-NEXT:    v_or_b32_e32 v22, v26, v48
1715; SDAG-NEXT:    v_or_b32_e32 v23, v24, v49
1716; SDAG-NEXT:    v_or_b32_e32 v10, v10, v8
1717; SDAG-NEXT:    v_sub_i32_e32 v8, vcc, v36, v23
1718; SDAG-NEXT:    v_subb_u32_e32 v8, vcc, v37, v25, vcc
1719; SDAG-NEXT:    v_subb_u32_e32 v8, vcc, v38, v22, vcc
1720; SDAG-NEXT:    v_subb_u32_e32 v8, vcc, v39, v27, vcc
1721; SDAG-NEXT:    v_ashrrev_i32_e32 v8, 31, v8
1722; SDAG-NEXT:    v_and_b32_e32 v24, v8, v31
1723; SDAG-NEXT:    v_and_b32_e32 v26, v8, v30
1724; SDAG-NEXT:    v_and_b32_e32 v48, v8, v2
1725; SDAG-NEXT:    v_and_b32_e32 v49, v8, v3
1726; SDAG-NEXT:    v_and_b32_e32 v8, 1, v8
1727; SDAG-NEXT:    v_sub_i32_e32 v24, vcc, v23, v24
1728; SDAG-NEXT:    v_subb_u32_e32 v25, vcc, v25, v26, vcc
1729; SDAG-NEXT:    v_subb_u32_e32 v26, vcc, v22, v48, vcc
1730; SDAG-NEXT:    v_subb_u32_e32 v27, vcc, v27, v49, vcc
1731; SDAG-NEXT:    v_add_i32_e32 v32, vcc, -1, v32
1732; SDAG-NEXT:    v_addc_u32_e32 v33, vcc, -1, v33, vcc
1733; SDAG-NEXT:    v_addc_u32_e32 v34, vcc, -1, v34, vcc
1734; SDAG-NEXT:    v_addc_u32_e32 v35, vcc, -1, v35, vcc
1735; SDAG-NEXT:    v_or_b32_e32 v22, v32, v34
1736; SDAG-NEXT:    v_or_b32_e32 v23, v33, v35
1737; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[22:23]
1738; SDAG-NEXT:    v_or_b32_e32 v11, v19, v11
1739; SDAG-NEXT:    s_or_b64 s[10:11], vcc, s[10:11]
1740; SDAG-NEXT:    v_or_b32_e32 v10, v18, v10
1741; SDAG-NEXT:    v_mov_b32_e32 v23, v9
1742; SDAG-NEXT:    v_mov_b32_e32 v22, v8
1743; SDAG-NEXT:    s_andn2_b64 exec, exec, s[10:11]
1744; SDAG-NEXT:    s_cbranch_execnz .LBB2_3
1745; SDAG-NEXT:  ; %bb.4: ; %Flow13
1746; SDAG-NEXT:    s_or_b64 exec, exec, s[10:11]
1747; SDAG-NEXT:  .LBB2_5: ; %Flow14
1748; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
1749; SDAG-NEXT:    v_lshl_b64 v[10:11], v[10:11], 1
1750; SDAG-NEXT:    v_lshrrev_b32_e32 v22, 31, v21
1751; SDAG-NEXT:    v_or_b32_e32 v10, v10, v22
1752; SDAG-NEXT:    v_lshl_b64 v[20:21], v[20:21], 1
1753; SDAG-NEXT:    v_or_b32_e32 v35, v19, v11
1754; SDAG-NEXT:    v_or_b32_e32 v32, v18, v10
1755; SDAG-NEXT:    v_or_b32_e32 v27, v9, v21
1756; SDAG-NEXT:    v_or_b32_e32 v33, v8, v20
1757; SDAG-NEXT:  .LBB2_6: ; %Flow16
1758; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
1759; SDAG-NEXT:    v_ashrrev_i32_e32 v26, 31, v7
1760; SDAG-NEXT:    v_sub_i32_e32 v8, vcc, 0, v4
1761; SDAG-NEXT:    v_mov_b32_e32 v18, 0
1762; SDAG-NEXT:    s_mov_b64 s[10:11], 0x7f
1763; SDAG-NEXT:    v_mov_b32_e32 v34, v26
1764; SDAG-NEXT:    v_subb_u32_e32 v9, vcc, 0, v5, vcc
1765; SDAG-NEXT:    v_subb_u32_e32 v10, vcc, 0, v6, vcc
1766; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[6:7]
1767; SDAG-NEXT:    v_cndmask_b32_e64 v9, v5, v9, s[4:5]
1768; SDAG-NEXT:    v_cndmask_b32_e64 v8, v4, v8, s[4:5]
1769; SDAG-NEXT:    v_subb_u32_e32 v5, vcc, 0, v7, vcc
1770; SDAG-NEXT:    v_cndmask_b32_e64 v4, v6, v10, s[4:5]
1771; SDAG-NEXT:    v_ffbh_u32_e32 v10, v8
1772; SDAG-NEXT:    v_ffbh_u32_e32 v11, v9
1773; SDAG-NEXT:    v_cndmask_b32_e64 v5, v7, v5, s[4:5]
1774; SDAG-NEXT:    v_sub_i32_e32 v19, vcc, 0, v12
1775; SDAG-NEXT:    v_or_b32_e32 v6, v8, v4
1776; SDAG-NEXT:    v_ffbh_u32_e32 v20, v4
1777; SDAG-NEXT:    v_add_i32_e64 v10, s[4:5], 32, v10
1778; SDAG-NEXT:    v_subb_u32_e32 v21, vcc, 0, v13, vcc
1779; SDAG-NEXT:    v_or_b32_e32 v7, v9, v5
1780; SDAG-NEXT:    v_add_i32_e64 v20, s[4:5], 32, v20
1781; SDAG-NEXT:    v_ffbh_u32_e32 v22, v5
1782; SDAG-NEXT:    v_min_u32_e32 v10, v10, v11
1783; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, 0, v14, vcc
1784; SDAG-NEXT:    v_cmp_gt_i64_e64 s[4:5], 0, v[14:15]
1785; SDAG-NEXT:    v_cndmask_b32_e64 v36, v13, v21, s[4:5]
1786; SDAG-NEXT:    v_cndmask_b32_e64 v37, v12, v19, s[4:5]
1787; SDAG-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[6:7]
1788; SDAG-NEXT:    v_min_u32_e32 v7, v20, v22
1789; SDAG-NEXT:    v_add_i32_e64 v10, s[8:9], 64, v10
1790; SDAG-NEXT:    v_addc_u32_e64 v12, s[8:9], 0, 0, s[8:9]
1791; SDAG-NEXT:    v_subb_u32_e32 v13, vcc, 0, v15, vcc
1792; SDAG-NEXT:    v_cndmask_b32_e64 v6, v14, v11, s[4:5]
1793; SDAG-NEXT:    v_ffbh_u32_e32 v11, v37
1794; SDAG-NEXT:    v_ffbh_u32_e32 v14, v36
1795; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
1796; SDAG-NEXT:    v_cndmask_b32_e64 v12, v12, 0, vcc
1797; SDAG-NEXT:    v_cndmask_b32_e32 v19, v10, v7, vcc
1798; SDAG-NEXT:    v_cndmask_b32_e64 v7, v15, v13, s[4:5]
1799; SDAG-NEXT:    v_or_b32_e32 v10, v37, v6
1800; SDAG-NEXT:    v_ffbh_u32_e32 v13, v6
1801; SDAG-NEXT:    v_add_i32_e32 v15, vcc, 32, v11
1802; SDAG-NEXT:    v_or_b32_e32 v11, v36, v7
1803; SDAG-NEXT:    v_add_i32_e32 v13, vcc, 32, v13
1804; SDAG-NEXT:    v_ffbh_u32_e32 v20, v7
1805; SDAG-NEXT:    v_min_u32_e32 v14, v15, v14
1806; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
1807; SDAG-NEXT:    v_min_u32_e32 v10, v13, v20
1808; SDAG-NEXT:    v_add_i32_e64 v11, s[4:5], 64, v14
1809; SDAG-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5]
1810; SDAG-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
1811; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
1812; SDAG-NEXT:    v_cndmask_b32_e64 v13, v13, 0, vcc
1813; SDAG-NEXT:    v_cndmask_b32_e32 v10, v11, v10, vcc
1814; SDAG-NEXT:    v_sub_i32_e32 v10, vcc, v10, v19
1815; SDAG-NEXT:    v_subb_u32_e32 v11, vcc, v13, v12, vcc
1816; SDAG-NEXT:    v_xor_b32_e32 v14, 0x7f, v10
1817; SDAG-NEXT:    v_subbrev_u32_e32 v12, vcc, 0, v18, vcc
1818; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[10:11], v[10:11]
1819; SDAG-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1820; SDAG-NEXT:    v_subbrev_u32_e32 v13, vcc, 0, v18, vcc
1821; SDAG-NEXT:    v_or_b32_e32 v14, v14, v12
1822; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[12:13]
1823; SDAG-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1824; SDAG-NEXT:    v_or_b32_e32 v15, v11, v13
1825; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[12:13]
1826; SDAG-NEXT:    v_cndmask_b32_e32 v18, v18, v19, vcc
1827; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
1828; SDAG-NEXT:    v_and_b32_e32 v14, 1, v18
1829; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v14
1830; SDAG-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
1831; SDAG-NEXT:    v_cndmask_b32_e64 v19, v5, 0, s[4:5]
1832; SDAG-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
1833; SDAG-NEXT:    v_cndmask_b32_e64 v18, v4, 0, s[4:5]
1834; SDAG-NEXT:    v_cndmask_b32_e64 v15, v9, 0, s[4:5]
1835; SDAG-NEXT:    v_cndmask_b32_e64 v14, v8, 0, s[4:5]
1836; SDAG-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
1837; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
1838; SDAG-NEXT:    s_cbranch_execz .LBB2_12
1839; SDAG-NEXT:  ; %bb.7: ; %udiv-bb1
1840; SDAG-NEXT:    v_add_i32_e32 v38, vcc, 1, v10
1841; SDAG-NEXT:    v_sub_i32_e64 v18, s[4:5], 63, v10
1842; SDAG-NEXT:    v_mov_b32_e32 v14, 0
1843; SDAG-NEXT:    v_mov_b32_e32 v15, 0
1844; SDAG-NEXT:    v_addc_u32_e32 v39, vcc, 0, v11, vcc
1845; SDAG-NEXT:    v_lshl_b64 v[18:19], v[8:9], v18
1846; SDAG-NEXT:    v_addc_u32_e32 v48, vcc, 0, v12, vcc
1847; SDAG-NEXT:    v_addc_u32_e32 v49, vcc, 0, v13, vcc
1848; SDAG-NEXT:    v_or_b32_e32 v11, v38, v48
1849; SDAG-NEXT:    v_sub_i32_e32 v13, vcc, 0x7f, v10
1850; SDAG-NEXT:    v_or_b32_e32 v12, v39, v49
1851; SDAG-NEXT:    v_lshl_b64 v[20:21], v[4:5], v13
1852; SDAG-NEXT:    v_sub_i32_e32 v10, vcc, 64, v13
1853; SDAG-NEXT:    v_lshl_b64 v[22:23], v[8:9], v13
1854; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[11:12]
1855; SDAG-NEXT:    v_lshr_b64 v[10:11], v[8:9], v10
1856; SDAG-NEXT:    v_or_b32_e32 v11, v21, v11
1857; SDAG-NEXT:    v_or_b32_e32 v10, v20, v10
1858; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v13
1859; SDAG-NEXT:    v_cndmask_b32_e64 v12, v19, v11, s[4:5]
1860; SDAG-NEXT:    v_cndmask_b32_e64 v18, v18, v10, s[4:5]
1861; SDAG-NEXT:    v_cndmask_b32_e64 v11, 0, v23, s[4:5]
1862; SDAG-NEXT:    v_cndmask_b32_e64 v10, 0, v22, s[4:5]
1863; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
1864; SDAG-NEXT:    v_cndmask_b32_e64 v13, v12, v5, s[4:5]
1865; SDAG-NEXT:    v_cndmask_b32_e64 v12, v18, v4, s[4:5]
1866; SDAG-NEXT:    v_mov_b32_e32 v18, 0
1867; SDAG-NEXT:    v_mov_b32_e32 v19, 0
1868; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1869; SDAG-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
1870; SDAG-NEXT:    s_cbranch_execz .LBB2_11
1871; SDAG-NEXT:  ; %bb.8: ; %udiv-preheader
1872; SDAG-NEXT:    v_lshr_b64 v[14:15], v[8:9], v38
1873; SDAG-NEXT:    v_sub_i32_e32 v24, vcc, 64, v38
1874; SDAG-NEXT:    v_subrev_i32_e32 v51, vcc, 64, v38
1875; SDAG-NEXT:    v_lshr_b64 v[22:23], v[4:5], v38
1876; SDAG-NEXT:    v_add_i32_e32 v50, vcc, -1, v37
1877; SDAG-NEXT:    s_mov_b64 s[10:11], 0
1878; SDAG-NEXT:    v_mov_b32_e32 v20, 0
1879; SDAG-NEXT:    v_mov_b32_e32 v21, 0
1880; SDAG-NEXT:    v_mov_b32_e32 v18, 0
1881; SDAG-NEXT:    v_mov_b32_e32 v19, 0
1882; SDAG-NEXT:    v_lshl_b64 v[24:25], v[4:5], v24
1883; SDAG-NEXT:    v_lshr_b64 v[53:54], v[4:5], v51
1884; SDAG-NEXT:    v_addc_u32_e32 v51, vcc, -1, v36, vcc
1885; SDAG-NEXT:    v_or_b32_e32 v15, v15, v25
1886; SDAG-NEXT:    v_or_b32_e32 v14, v14, v24
1887; SDAG-NEXT:    v_addc_u32_e32 v52, vcc, -1, v6, vcc
1888; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v38
1889; SDAG-NEXT:    v_cndmask_b32_e64 v15, v54, v15, s[4:5]
1890; SDAG-NEXT:    v_cndmask_b32_e64 v14, v53, v14, s[4:5]
1891; SDAG-NEXT:    v_cndmask_b32_e64 v25, 0, v23, s[4:5]
1892; SDAG-NEXT:    v_cndmask_b32_e64 v24, 0, v22, s[4:5]
1893; SDAG-NEXT:    v_addc_u32_e32 v53, vcc, -1, v7, vcc
1894; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v38
1895; SDAG-NEXT:    v_cndmask_b32_e32 v23, v15, v9, vcc
1896; SDAG-NEXT:    v_cndmask_b32_e32 v22, v14, v8, vcc
1897; SDAG-NEXT:    v_mov_b32_e32 v15, 0
1898; SDAG-NEXT:  .LBB2_9: ; %udiv-do-while
1899; SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
1900; SDAG-NEXT:    v_lshl_b64 v[24:25], v[24:25], 1
1901; SDAG-NEXT:    v_lshrrev_b32_e32 v14, 31, v23
1902; SDAG-NEXT:    v_lshl_b64 v[22:23], v[22:23], 1
1903; SDAG-NEXT:    v_lshrrev_b32_e32 v54, 31, v13
1904; SDAG-NEXT:    v_lshl_b64 v[12:13], v[12:13], 1
1905; SDAG-NEXT:    v_lshrrev_b32_e32 v55, 31, v11
1906; SDAG-NEXT:    v_lshl_b64 v[10:11], v[10:11], 1
1907; SDAG-NEXT:    v_or_b32_e32 v24, v24, v14
1908; SDAG-NEXT:    v_or_b32_e32 v22, v22, v54
1909; SDAG-NEXT:    v_or_b32_e32 v12, v12, v55
1910; SDAG-NEXT:    v_or_b32_e32 v13, v19, v13
1911; SDAG-NEXT:    v_or_b32_e32 v11, v21, v11
1912; SDAG-NEXT:    v_or_b32_e32 v12, v18, v12
1913; SDAG-NEXT:    v_sub_i32_e32 v14, vcc, v50, v22
1914; SDAG-NEXT:    v_subb_u32_e32 v14, vcc, v51, v23, vcc
1915; SDAG-NEXT:    v_subb_u32_e32 v14, vcc, v52, v24, vcc
1916; SDAG-NEXT:    v_subb_u32_e32 v14, vcc, v53, v25, vcc
1917; SDAG-NEXT:    v_ashrrev_i32_e32 v21, 31, v14
1918; SDAG-NEXT:    v_and_b32_e32 v14, 1, v21
1919; SDAG-NEXT:    v_and_b32_e32 v54, v21, v7
1920; SDAG-NEXT:    v_and_b32_e32 v55, v21, v6
1921; SDAG-NEXT:    v_and_b32_e32 v40, v21, v36
1922; SDAG-NEXT:    v_and_b32_e32 v21, v21, v37
1923; SDAG-NEXT:    v_sub_i32_e32 v22, vcc, v22, v21
1924; SDAG-NEXT:    v_subb_u32_e32 v23, vcc, v23, v40, vcc
1925; SDAG-NEXT:    v_subb_u32_e32 v24, vcc, v24, v55, vcc
1926; SDAG-NEXT:    v_subb_u32_e32 v25, vcc, v25, v54, vcc
1927; SDAG-NEXT:    v_add_i32_e32 v38, vcc, -1, v38
1928; SDAG-NEXT:    v_addc_u32_e32 v39, vcc, -1, v39, vcc
1929; SDAG-NEXT:    v_addc_u32_e32 v48, vcc, -1, v48, vcc
1930; SDAG-NEXT:    v_addc_u32_e32 v49, vcc, -1, v49, vcc
1931; SDAG-NEXT:    v_or_b32_e32 v55, v39, v49
1932; SDAG-NEXT:    v_or_b32_e32 v54, v38, v48
1933; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[54:55]
1934; SDAG-NEXT:    s_or_b64 s[10:11], vcc, s[10:11]
1935; SDAG-NEXT:    v_or_b32_e32 v10, v20, v10
1936; SDAG-NEXT:    v_mov_b32_e32 v21, v15
1937; SDAG-NEXT:    v_mov_b32_e32 v20, v14
1938; SDAG-NEXT:    s_andn2_b64 exec, exec, s[10:11]
1939; SDAG-NEXT:    s_cbranch_execnz .LBB2_9
1940; SDAG-NEXT:  ; %bb.10: ; %Flow
1941; SDAG-NEXT:    s_or_b64 exec, exec, s[10:11]
1942; SDAG-NEXT:  .LBB2_11: ; %Flow11
1943; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
1944; SDAG-NEXT:    v_lshl_b64 v[12:13], v[12:13], 1
1945; SDAG-NEXT:    v_lshrrev_b32_e32 v20, 31, v11
1946; SDAG-NEXT:    v_lshl_b64 v[10:11], v[10:11], 1
1947; SDAG-NEXT:    v_or_b32_e32 v12, v12, v20
1948; SDAG-NEXT:    v_or_b32_e32 v19, v19, v13
1949; SDAG-NEXT:    v_or_b32_e32 v15, v15, v11
1950; SDAG-NEXT:    v_or_b32_e32 v18, v18, v12
1951; SDAG-NEXT:    v_or_b32_e32 v14, v14, v10
1952; SDAG-NEXT:  .LBB2_12: ; %Flow12
1953; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
1954; SDAG-NEXT:    v_mul_lo_u32 v12, v33, v3
1955; SDAG-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v33, v2, 0
1956; SDAG-NEXT:    v_mul_lo_u32 v24, v27, v2
1957; SDAG-NEXT:    v_mul_lo_u32 v35, v35, v31
1958; SDAG-NEXT:    v_mul_lo_u32 v38, v32, v30
1959; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v31, v33, 0
1960; SDAG-NEXT:    v_mov_b32_e32 v13, 0
1961; SDAG-NEXT:    v_mul_lo_u32 v25, v14, v7
1962; SDAG-NEXT:    v_mad_u64_u32 v[20:21], s[4:5], v14, v6, 0
1963; SDAG-NEXT:    v_mul_lo_u32 v39, v15, v6
1964; SDAG-NEXT:    v_mul_lo_u32 v19, v19, v37
1965; SDAG-NEXT:    v_mul_lo_u32 v48, v18, v36
1966; SDAG-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v37, v14, 0
1967; SDAG-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
1968; SDAG-NEXT:    v_mov_b32_e32 v12, v3
1969; SDAG-NEXT:    v_mad_u64_u32 v[22:23], s[4:5], v30, v33, v[12:13]
1970; SDAG-NEXT:    v_sub_i32_e32 v12, vcc, v16, v2
1971; SDAG-NEXT:    v_add_i32_e64 v16, s[4:5], v21, v25
1972; SDAG-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v24
1973; SDAG-NEXT:    v_mov_b32_e32 v24, v23
1974; SDAG-NEXT:    v_mov_b32_e32 v23, v13
1975; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v31, v27, v[22:23]
1976; SDAG-NEXT:    v_xor_b32_e32 v33, v12, v28
1977; SDAG-NEXT:    v_add_i32_e64 v21, s[4:5], v16, v39
1978; SDAG-NEXT:    v_mov_b32_e32 v12, v7
1979; SDAG-NEXT:    v_mad_u64_u32 v[22:23], s[4:5], v36, v14, v[12:13]
1980; SDAG-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v32, v31, v[10:11]
1981; SDAG-NEXT:    v_add_i32_e64 v24, s[4:5], v24, v3
1982; SDAG-NEXT:    v_addc_u32_e64 v25, s[4:5], 0, 0, s[4:5]
1983; SDAG-NEXT:    v_subb_u32_e32 v7, vcc, v17, v2, vcc
1984; SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v18, v37, v[20:21]
1985; SDAG-NEXT:    v_mov_b32_e32 v14, v23
1986; SDAG-NEXT:    v_mov_b32_e32 v23, v13
1987; SDAG-NEXT:    v_mad_u64_u32 v[12:13], s[4:5], v37, v15, v[22:23]
1988; SDAG-NEXT:    v_add_i32_e64 v11, s[4:5], v35, v11
1989; SDAG-NEXT:    v_mad_u64_u32 v[16:17], s[4:5], v30, v27, v[24:25]
1990; SDAG-NEXT:    v_xor_b32_e32 v7, v7, v29
1991; SDAG-NEXT:    v_add_i32_e64 v3, s[4:5], v19, v3
1992; SDAG-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
1993; SDAG-NEXT:    v_addc_u32_e64 v14, s[4:5], 0, 0, s[4:5]
1994; SDAG-NEXT:    v_mov_b32_e32 v18, v12
1995; SDAG-NEXT:    v_add_i32_e64 v19, s[4:5], v38, v11
1996; SDAG-NEXT:    v_add_i32_e64 v3, s[4:5], v48, v3
1997; SDAG-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v36, v15, v[13:14]
1998; SDAG-NEXT:    v_add_i32_e64 v10, s[4:5], v16, v10
1999; SDAG-NEXT:    v_addc_u32_e64 v13, s[4:5], v17, v19, s[4:5]
2000; SDAG-NEXT:    v_subb_u32_e32 v0, vcc, v0, v10, vcc
2001; SDAG-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v2
2002; SDAG-NEXT:    v_addc_u32_e64 v11, s[4:5], v12, v3, s[4:5]
2003; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, v1, v13, vcc
2004; SDAG-NEXT:    v_xor_b32_e32 v2, v0, v28
2005; SDAG-NEXT:    v_xor_b32_e32 v3, v1, v29
2006; SDAG-NEXT:    v_sub_i32_e32 v0, vcc, v33, v28
2007; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, v7, v29, vcc
2008; SDAG-NEXT:    v_subb_u32_e32 v2, vcc, v2, v28, vcc
2009; SDAG-NEXT:    v_subb_u32_e32 v3, vcc, v3, v29, vcc
2010; SDAG-NEXT:    v_sub_i32_e32 v6, vcc, v8, v6
2011; SDAG-NEXT:    v_subb_u32_e32 v7, vcc, v9, v18, vcc
2012; SDAG-NEXT:    v_xor_b32_e32 v6, v6, v26
2013; SDAG-NEXT:    v_subb_u32_e32 v4, vcc, v4, v10, vcc
2014; SDAG-NEXT:    v_xor_b32_e32 v7, v7, v34
2015; SDAG-NEXT:    v_subb_u32_e32 v5, vcc, v5, v11, vcc
2016; SDAG-NEXT:    v_xor_b32_e32 v8, v4, v26
2017; SDAG-NEXT:    v_xor_b32_e32 v9, v5, v34
2018; SDAG-NEXT:    v_sub_i32_e32 v4, vcc, v6, v26
2019; SDAG-NEXT:    v_subb_u32_e32 v5, vcc, v7, v34, vcc
2020; SDAG-NEXT:    v_subb_u32_e32 v6, vcc, v8, v26, vcc
2021; SDAG-NEXT:    v_subb_u32_e32 v7, vcc, v9, v34, vcc
2022; SDAG-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2023; SDAG-NEXT:    s_waitcnt vmcnt(0)
2024; SDAG-NEXT:    s_setpc_b64 s[30:31]
2025;
2026; GISEL-LABEL: v_srem_v2i128_vv:
2027; GISEL:       ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
2028; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2029; GISEL-NEXT:    v_ashrrev_i32_e32 v28, 31, v3
2030; GISEL-NEXT:    v_ashrrev_i32_e32 v18, 31, v11
2031; GISEL-NEXT:    v_mov_b32_e32 v19, 0x7f
2032; GISEL-NEXT:    v_mov_b32_e32 v20, 0
2033; GISEL-NEXT:    s_mov_b64 s[8:9], 0
2034; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v28
2035; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v28
2036; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v28
2037; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v28
2038; GISEL-NEXT:    v_xor_b32_e32 v8, v8, v18
2039; GISEL-NEXT:    v_xor_b32_e32 v9, v9, v18
2040; GISEL-NEXT:    v_xor_b32_e32 v10, v10, v18
2041; GISEL-NEXT:    v_xor_b32_e32 v11, v11, v18
2042; GISEL-NEXT:    v_sub_i32_e32 v16, vcc, v0, v28
2043; GISEL-NEXT:    v_subb_u32_e32 v17, vcc, v1, v28, vcc
2044; GISEL-NEXT:    v_sub_i32_e64 v30, s[4:5], v8, v18
2045; GISEL-NEXT:    v_subb_u32_e64 v29, s[4:5], v9, v18, s[4:5]
2046; GISEL-NEXT:    v_subb_u32_e32 v8, vcc, v2, v28, vcc
2047; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, v3, v28, vcc
2048; GISEL-NEXT:    v_subb_u32_e64 v10, vcc, v10, v18, s[4:5]
2049; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, v11, v18, vcc
2050; GISEL-NEXT:    v_ffbh_u32_e32 v18, v29
2051; GISEL-NEXT:    v_ffbh_u32_e32 v21, v30
2052; GISEL-NEXT:    v_ffbh_u32_e32 v22, v17
2053; GISEL-NEXT:    v_ffbh_u32_e32 v23, v16
2054; GISEL-NEXT:    v_or_b32_e32 v0, v30, v10
2055; GISEL-NEXT:    v_or_b32_e32 v1, v29, v11
2056; GISEL-NEXT:    v_or_b32_e32 v2, v16, v8
2057; GISEL-NEXT:    v_or_b32_e32 v3, v17, v9
2058; GISEL-NEXT:    v_add_i32_e32 v21, vcc, 32, v21
2059; GISEL-NEXT:    v_add_i32_e32 v23, vcc, 32, v23
2060; GISEL-NEXT:    v_ffbh_u32_e32 v24, v10
2061; GISEL-NEXT:    v_ffbh_u32_e32 v25, v11
2062; GISEL-NEXT:    v_ffbh_u32_e32 v26, v8
2063; GISEL-NEXT:    v_ffbh_u32_e32 v27, v9
2064; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
2065; GISEL-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[2:3]
2066; GISEL-NEXT:    v_min_u32_e32 v0, v18, v21
2067; GISEL-NEXT:    v_min_u32_e32 v1, v22, v23
2068; GISEL-NEXT:    v_add_i32_e64 v2, s[6:7], 32, v24
2069; GISEL-NEXT:    v_add_i32_e64 v3, s[6:7], 32, v26
2070; GISEL-NEXT:    v_min_u32_e32 v2, v25, v2
2071; GISEL-NEXT:    v_min_u32_e32 v3, v27, v3
2072; GISEL-NEXT:    v_add_i32_e64 v0, s[6:7], 64, v0
2073; GISEL-NEXT:    v_add_i32_e64 v1, s[6:7], 64, v1
2074; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2075; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, s[4:5]
2076; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
2077; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
2078; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
2079; GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
2080; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
2081; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
2082; GISEL-NEXT:    v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
2083; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
2084; GISEL-NEXT:    v_xor_b32_e32 v18, 0x7f, v2
2085; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[2:3], v[19:20]
2086; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
2087; GISEL-NEXT:    v_or_b32_e32 v18, v18, v0
2088; GISEL-NEXT:    v_or_b32_e32 v19, v3, v1
2089; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
2090; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, vcc
2091; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
2092; GISEL-NEXT:    v_cndmask_b32_e32 v20, v22, v20, vcc
2093; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[18:19]
2094; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
2095; GISEL-NEXT:    v_or_b32_e32 v19, v21, v20
2096; GISEL-NEXT:    v_or_b32_e32 v18, v19, v18
2097; GISEL-NEXT:    v_and_b32_e32 v19, 1, v19
2098; GISEL-NEXT:    v_and_b32_e32 v18, 1, v18
2099; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v19
2100; GISEL-NEXT:    v_cndmask_b32_e64 v31, v16, 0, vcc
2101; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v18
2102; GISEL-NEXT:    v_cndmask_b32_e64 v18, v8, 0, vcc
2103; GISEL-NEXT:    v_cndmask_b32_e64 v19, v9, 0, vcc
2104; GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
2105; GISEL-NEXT:    v_cndmask_b32_e64 v32, v17, 0, vcc
2106; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
2107; GISEL-NEXT:    s_cbranch_execz .LBB2_6
2108; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
2109; GISEL-NEXT:    v_add_i32_e32 v31, vcc, 1, v2
2110; GISEL-NEXT:    v_addc_u32_e64 v32, s[4:5], 0, v3, vcc
2111; GISEL-NEXT:    v_sub_i32_e32 v24, vcc, 0x7f, v2
2112; GISEL-NEXT:    v_not_b32_e32 v2, 63
2113; GISEL-NEXT:    v_addc_u32_e64 v33, vcc, 0, v0, s[4:5]
2114; GISEL-NEXT:    v_addc_u32_e32 v34, vcc, 0, v1, vcc
2115; GISEL-NEXT:    v_add_i32_e64 v20, s[4:5], v24, v2
2116; GISEL-NEXT:    v_sub_i32_e64 v18, s[4:5], 64, v24
2117; GISEL-NEXT:    v_lshl_b64 v[0:1], v[16:17], v24
2118; GISEL-NEXT:    v_lshl_b64 v[2:3], v[8:9], v24
2119; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
2120; GISEL-NEXT:    v_lshr_b64 v[18:19], v[16:17], v18
2121; GISEL-NEXT:    v_lshl_b64 v[22:23], v[16:17], v20
2122; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v24
2123; GISEL-NEXT:    v_cndmask_b32_e32 v20, 0, v0, vcc
2124; GISEL-NEXT:    v_cndmask_b32_e32 v21, 0, v1, vcc
2125; GISEL-NEXT:    v_or_b32_e32 v0, v18, v2
2126; GISEL-NEXT:    v_or_b32_e32 v1, v19, v3
2127; GISEL-NEXT:    v_cndmask_b32_e32 v0, v22, v0, vcc
2128; GISEL-NEXT:    v_cndmask_b32_e32 v1, v23, v1, vcc
2129; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
2130; GISEL-NEXT:    v_cndmask_b32_e32 v18, v0, v8, vcc
2131; GISEL-NEXT:    v_cndmask_b32_e32 v19, v1, v9, vcc
2132; GISEL-NEXT:    s_mov_b64 s[10:11], s[8:9]
2133; GISEL-NEXT:    v_mov_b32_e32 v0, s8
2134; GISEL-NEXT:    v_mov_b32_e32 v1, s9
2135; GISEL-NEXT:    v_mov_b32_e32 v2, s10
2136; GISEL-NEXT:    v_mov_b32_e32 v3, s11
2137; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
2138; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
2139; GISEL-NEXT:    s_cbranch_execz .LBB2_5
2140; GISEL-NEXT:  ; %bb.2: ; %udiv-preheader4
2141; GISEL-NEXT:    v_add_i32_e32 v24, vcc, 0xffffffc0, v31
2142; GISEL-NEXT:    v_sub_i32_e32 v22, vcc, 64, v31
2143; GISEL-NEXT:    v_lshr_b64 v[0:1], v[8:9], v31
2144; GISEL-NEXT:    v_lshr_b64 v[2:3], v[16:17], v31
2145; GISEL-NEXT:    s_mov_b64 s[4:5], 0
2146; GISEL-NEXT:    v_add_i32_e32 v35, vcc, -1, v30
2147; GISEL-NEXT:    v_addc_u32_e32 v36, vcc, -1, v29, vcc
2148; GISEL-NEXT:    v_lshl_b64 v[22:23], v[8:9], v22
2149; GISEL-NEXT:    v_lshr_b64 v[24:25], v[8:9], v24
2150; GISEL-NEXT:    v_addc_u32_e32 v37, vcc, -1, v10, vcc
2151; GISEL-NEXT:    v_addc_u32_e32 v38, vcc, -1, v11, vcc
2152; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
2153; GISEL-NEXT:    v_or_b32_e32 v2, v2, v22
2154; GISEL-NEXT:    v_or_b32_e32 v3, v3, v23
2155; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v31
2156; GISEL-NEXT:    v_cndmask_b32_e32 v2, v24, v2, vcc
2157; GISEL-NEXT:    v_cndmask_b32_e32 v3, v25, v3, vcc
2158; GISEL-NEXT:    v_cndmask_b32_e32 v24, 0, v0, vcc
2159; GISEL-NEXT:    v_cndmask_b32_e32 v25, 0, v1, vcc
2160; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v31
2161; GISEL-NEXT:    v_cndmask_b32_e32 v26, v2, v16, vcc
2162; GISEL-NEXT:    v_cndmask_b32_e32 v27, v3, v17, vcc
2163; GISEL-NEXT:    v_mov_b32_e32 v23, 0
2164; GISEL-NEXT:    v_mov_b32_e32 v0, s4
2165; GISEL-NEXT:    v_mov_b32_e32 v1, s5
2166; GISEL-NEXT:    v_mov_b32_e32 v2, s6
2167; GISEL-NEXT:    v_mov_b32_e32 v3, s7
2168; GISEL-NEXT:  .LBB2_3: ; %udiv-do-while3
2169; GISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
2170; GISEL-NEXT:    v_lshrrev_b32_e32 v39, 31, v21
2171; GISEL-NEXT:    v_lshl_b64 v[2:3], v[20:21], 1
2172; GISEL-NEXT:    v_lshrrev_b32_e32 v22, 31, v27
2173; GISEL-NEXT:    v_lshl_b64 v[26:27], v[26:27], 1
2174; GISEL-NEXT:    v_lshl_b64 v[24:25], v[24:25], 1
2175; GISEL-NEXT:    v_lshrrev_b32_e32 v48, 31, v19
2176; GISEL-NEXT:    v_add_i32_e32 v31, vcc, -1, v31
2177; GISEL-NEXT:    v_addc_u32_e32 v32, vcc, -1, v32, vcc
2178; GISEL-NEXT:    v_lshl_b64 v[18:19], v[18:19], 1
2179; GISEL-NEXT:    v_or_b32_e32 v20, v0, v2
2180; GISEL-NEXT:    v_or_b32_e32 v21, v1, v3
2181; GISEL-NEXT:    v_or_b32_e32 v2, v24, v22
2182; GISEL-NEXT:    v_or_b32_e32 v3, v26, v48
2183; GISEL-NEXT:    v_addc_u32_e32 v33, vcc, -1, v33, vcc
2184; GISEL-NEXT:    v_addc_u32_e32 v34, vcc, -1, v34, vcc
2185; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v35, v3
2186; GISEL-NEXT:    v_subb_u32_e32 v0, vcc, v36, v27, vcc
2187; GISEL-NEXT:    v_or_b32_e32 v0, v31, v33
2188; GISEL-NEXT:    v_or_b32_e32 v1, v32, v34
2189; GISEL-NEXT:    v_subb_u32_e32 v22, vcc, v37, v2, vcc
2190; GISEL-NEXT:    v_subb_u32_e32 v22, vcc, v38, v25, vcc
2191; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
2192; GISEL-NEXT:    v_ashrrev_i32_e32 v0, 31, v22
2193; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2194; GISEL-NEXT:    v_and_b32_e32 v1, v0, v30
2195; GISEL-NEXT:    v_and_b32_e32 v24, v0, v29
2196; GISEL-NEXT:    v_and_b32_e32 v48, v0, v10
2197; GISEL-NEXT:    v_and_b32_e32 v49, v0, v11
2198; GISEL-NEXT:    v_and_b32_e32 v22, 1, v0
2199; GISEL-NEXT:    v_sub_i32_e32 v26, vcc, v3, v1
2200; GISEL-NEXT:    v_subb_u32_e32 v27, vcc, v27, v24, vcc
2201; GISEL-NEXT:    v_subb_u32_e32 v24, vcc, v2, v48, vcc
2202; GISEL-NEXT:    v_subb_u32_e32 v25, vcc, v25, v49, vcc
2203; GISEL-NEXT:    v_or_b32_e32 v18, v18, v39
2204; GISEL-NEXT:    v_mov_b32_e32 v0, v22
2205; GISEL-NEXT:    v_mov_b32_e32 v1, v23
2206; GISEL-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2207; GISEL-NEXT:    s_cbranch_execnz .LBB2_3
2208; GISEL-NEXT:  ; %bb.4: ; %Flow13
2209; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
2210; GISEL-NEXT:  .LBB2_5: ; %Flow14
2211; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
2212; GISEL-NEXT:    v_lshl_b64 v[2:3], v[20:21], 1
2213; GISEL-NEXT:    v_lshl_b64 v[18:19], v[18:19], 1
2214; GISEL-NEXT:    v_lshrrev_b32_e32 v20, 31, v21
2215; GISEL-NEXT:    v_or_b32_e32 v18, v18, v20
2216; GISEL-NEXT:    v_or_b32_e32 v31, v0, v2
2217; GISEL-NEXT:    v_or_b32_e32 v32, v1, v3
2218; GISEL-NEXT:  .LBB2_6: ; %Flow16
2219; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
2220; GISEL-NEXT:    s_mov_b64 s[8:9], 0
2221; GISEL-NEXT:    v_ashrrev_i32_e32 v33, 31, v7
2222; GISEL-NEXT:    v_ashrrev_i32_e32 v0, 31, v15
2223; GISEL-NEXT:    v_mov_b32_e32 v2, 0x7f
2224; GISEL-NEXT:    v_mov_b32_e32 v3, 0
2225; GISEL-NEXT:    v_xor_b32_e32 v1, v4, v33
2226; GISEL-NEXT:    v_xor_b32_e32 v4, v5, v33
2227; GISEL-NEXT:    v_xor_b32_e32 v5, v6, v33
2228; GISEL-NEXT:    v_xor_b32_e32 v7, v7, v33
2229; GISEL-NEXT:    v_xor_b32_e32 v6, v12, v0
2230; GISEL-NEXT:    v_xor_b32_e32 v20, v13, v0
2231; GISEL-NEXT:    v_xor_b32_e32 v14, v14, v0
2232; GISEL-NEXT:    v_xor_b32_e32 v15, v15, v0
2233; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, v1, v33
2234; GISEL-NEXT:    v_subb_u32_e32 v13, vcc, v4, v33, vcc
2235; GISEL-NEXT:    v_sub_i32_e64 v35, s[4:5], v6, v0
2236; GISEL-NEXT:    v_subb_u32_e64 v34, s[4:5], v20, v0, s[4:5]
2237; GISEL-NEXT:    v_subb_u32_e32 v6, vcc, v5, v33, vcc
2238; GISEL-NEXT:    v_subb_u32_e32 v7, vcc, v7, v33, vcc
2239; GISEL-NEXT:    v_subb_u32_e64 v4, vcc, v14, v0, s[4:5]
2240; GISEL-NEXT:    v_subb_u32_e32 v5, vcc, v15, v0, vcc
2241; GISEL-NEXT:    v_ffbh_u32_e32 v20, v34
2242; GISEL-NEXT:    v_ffbh_u32_e32 v21, v35
2243; GISEL-NEXT:    v_ffbh_u32_e32 v22, v13
2244; GISEL-NEXT:    v_ffbh_u32_e32 v23, v12
2245; GISEL-NEXT:    v_or_b32_e32 v0, v35, v4
2246; GISEL-NEXT:    v_or_b32_e32 v1, v34, v5
2247; GISEL-NEXT:    v_or_b32_e32 v14, v12, v6
2248; GISEL-NEXT:    v_or_b32_e32 v15, v13, v7
2249; GISEL-NEXT:    v_add_i32_e32 v21, vcc, 32, v21
2250; GISEL-NEXT:    v_ffbh_u32_e32 v24, v5
2251; GISEL-NEXT:    v_ffbh_u32_e32 v25, v4
2252; GISEL-NEXT:    v_add_i32_e32 v23, vcc, 32, v23
2253; GISEL-NEXT:    v_ffbh_u32_e32 v26, v7
2254; GISEL-NEXT:    v_ffbh_u32_e32 v27, v6
2255; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
2256; GISEL-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[14:15]
2257; GISEL-NEXT:    v_min_u32_e32 v0, v20, v21
2258; GISEL-NEXT:    v_add_i32_e64 v1, s[6:7], 32, v25
2259; GISEL-NEXT:    v_min_u32_e32 v14, v22, v23
2260; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], 32, v27
2261; GISEL-NEXT:    v_add_i32_e64 v0, s[6:7], 64, v0
2262; GISEL-NEXT:    v_min_u32_e32 v1, v24, v1
2263; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], 64, v14
2264; GISEL-NEXT:    v_min_u32_e32 v15, v26, v15
2265; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2266; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s[4:5]
2267; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
2268; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
2269; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
2270; GISEL-NEXT:    v_cndmask_b32_e32 v1, v15, v14, vcc
2271; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v0, v1
2272; GISEL-NEXT:    v_subb_u32_e64 v15, s[4:5], 0, 0, vcc
2273; GISEL-NEXT:    v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
2274; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
2275; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[14:15], v[2:3]
2276; GISEL-NEXT:    v_cndmask_b32_e64 v21, 0, 1, vcc
2277; GISEL-NEXT:    v_xor_b32_e32 v2, 0x7f, v14
2278; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[0:1]
2279; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, vcc
2280; GISEL-NEXT:    v_or_b32_e32 v2, v2, v0
2281; GISEL-NEXT:    v_or_b32_e32 v3, v15, v1
2282; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
2283; GISEL-NEXT:    v_cndmask_b32_e32 v21, v22, v21, vcc
2284; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
2285; GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2286; GISEL-NEXT:    v_or_b32_e32 v3, v20, v21
2287; GISEL-NEXT:    v_and_b32_e32 v20, 1, v3
2288; GISEL-NEXT:    v_or_b32_e32 v2, v3, v2
2289; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v20
2290; GISEL-NEXT:    v_cndmask_b32_e64 v20, v12, 0, vcc
2291; GISEL-NEXT:    v_and_b32_e32 v22, 1, v2
2292; GISEL-NEXT:    v_cndmask_b32_e64 v21, v13, 0, vcc
2293; GISEL-NEXT:    v_cndmask_b32_e64 v2, v6, 0, vcc
2294; GISEL-NEXT:    v_cndmask_b32_e64 v3, v7, 0, vcc
2295; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v22
2296; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
2297; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
2298; GISEL-NEXT:    s_cbranch_execz .LBB2_12
2299; GISEL-NEXT:  ; %bb.7: ; %udiv-bb1
2300; GISEL-NEXT:    v_add_i32_e32 v36, vcc, 1, v14
2301; GISEL-NEXT:    v_addc_u32_e64 v37, s[4:5], 0, v15, vcc
2302; GISEL-NEXT:    v_sub_i32_e32 v24, vcc, 0x7f, v14
2303; GISEL-NEXT:    v_not_b32_e32 v2, 63
2304; GISEL-NEXT:    v_addc_u32_e64 v38, vcc, 0, v0, s[4:5]
2305; GISEL-NEXT:    v_addc_u32_e32 v39, vcc, 0, v1, vcc
2306; GISEL-NEXT:    v_add_i32_e64 v20, s[4:5], v24, v2
2307; GISEL-NEXT:    v_sub_i32_e64 v14, s[4:5], 64, v24
2308; GISEL-NEXT:    v_lshl_b64 v[0:1], v[12:13], v24
2309; GISEL-NEXT:    v_lshl_b64 v[2:3], v[6:7], v24
2310; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
2311; GISEL-NEXT:    v_lshr_b64 v[14:15], v[12:13], v14
2312; GISEL-NEXT:    v_lshl_b64 v[22:23], v[12:13], v20
2313; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v24
2314; GISEL-NEXT:    v_cndmask_b32_e32 v20, 0, v0, vcc
2315; GISEL-NEXT:    v_cndmask_b32_e32 v21, 0, v1, vcc
2316; GISEL-NEXT:    v_or_b32_e32 v0, v14, v2
2317; GISEL-NEXT:    v_or_b32_e32 v1, v15, v3
2318; GISEL-NEXT:    v_cndmask_b32_e32 v0, v22, v0, vcc
2319; GISEL-NEXT:    v_cndmask_b32_e32 v1, v23, v1, vcc
2320; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
2321; GISEL-NEXT:    v_cndmask_b32_e32 v14, v0, v6, vcc
2322; GISEL-NEXT:    v_cndmask_b32_e32 v15, v1, v7, vcc
2323; GISEL-NEXT:    s_mov_b64 s[10:11], s[8:9]
2324; GISEL-NEXT:    v_mov_b32_e32 v0, s8
2325; GISEL-NEXT:    v_mov_b32_e32 v1, s9
2326; GISEL-NEXT:    v_mov_b32_e32 v2, s10
2327; GISEL-NEXT:    v_mov_b32_e32 v3, s11
2328; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
2329; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
2330; GISEL-NEXT:    s_cbranch_execz .LBB2_11
2331; GISEL-NEXT:  ; %bb.8: ; %udiv-preheader
2332; GISEL-NEXT:    v_add_i32_e32 v24, vcc, 0xffffffc0, v36
2333; GISEL-NEXT:    v_sub_i32_e32 v22, vcc, 64, v36
2334; GISEL-NEXT:    v_lshr_b64 v[0:1], v[6:7], v36
2335; GISEL-NEXT:    v_lshr_b64 v[2:3], v[12:13], v36
2336; GISEL-NEXT:    s_mov_b64 s[4:5], 0
2337; GISEL-NEXT:    v_add_i32_e32 v48, vcc, -1, v35
2338; GISEL-NEXT:    v_addc_u32_e32 v49, vcc, -1, v34, vcc
2339; GISEL-NEXT:    v_lshl_b64 v[22:23], v[6:7], v22
2340; GISEL-NEXT:    v_lshr_b64 v[24:25], v[6:7], v24
2341; GISEL-NEXT:    v_addc_u32_e32 v50, vcc, -1, v4, vcc
2342; GISEL-NEXT:    v_addc_u32_e32 v51, vcc, -1, v5, vcc
2343; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
2344; GISEL-NEXT:    v_or_b32_e32 v2, v2, v22
2345; GISEL-NEXT:    v_or_b32_e32 v3, v3, v23
2346; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v36
2347; GISEL-NEXT:    v_cndmask_b32_e32 v2, v24, v2, vcc
2348; GISEL-NEXT:    v_cndmask_b32_e32 v3, v25, v3, vcc
2349; GISEL-NEXT:    v_cndmask_b32_e32 v26, 0, v0, vcc
2350; GISEL-NEXT:    v_cndmask_b32_e32 v27, 0, v1, vcc
2351; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v36
2352; GISEL-NEXT:    v_cndmask_b32_e32 v24, v2, v12, vcc
2353; GISEL-NEXT:    v_cndmask_b32_e32 v25, v3, v13, vcc
2354; GISEL-NEXT:    v_mov_b32_e32 v23, 0
2355; GISEL-NEXT:    v_mov_b32_e32 v0, s4
2356; GISEL-NEXT:    v_mov_b32_e32 v1, s5
2357; GISEL-NEXT:    v_mov_b32_e32 v2, s6
2358; GISEL-NEXT:    v_mov_b32_e32 v3, s7
2359; GISEL-NEXT:  .LBB2_9: ; %udiv-do-while
2360; GISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
2361; GISEL-NEXT:    v_lshl_b64 v[2:3], v[20:21], 1
2362; GISEL-NEXT:    v_lshrrev_b32_e32 v22, 31, v21
2363; GISEL-NEXT:    v_lshl_b64 v[52:53], v[24:25], 1
2364; GISEL-NEXT:    v_lshl_b64 v[26:27], v[26:27], 1
2365; GISEL-NEXT:    v_lshrrev_b32_e32 v24, 31, v25
2366; GISEL-NEXT:    v_lshrrev_b32_e32 v25, 31, v15
2367; GISEL-NEXT:    v_lshl_b64 v[14:15], v[14:15], 1
2368; GISEL-NEXT:    v_add_i32_e32 v36, vcc, -1, v36
2369; GISEL-NEXT:    v_addc_u32_e32 v37, vcc, -1, v37, vcc
2370; GISEL-NEXT:    v_or_b32_e32 v20, v0, v2
2371; GISEL-NEXT:    v_or_b32_e32 v21, v1, v3
2372; GISEL-NEXT:    v_or_b32_e32 v2, v26, v24
2373; GISEL-NEXT:    v_or_b32_e32 v3, v52, v25
2374; GISEL-NEXT:    v_or_b32_e32 v14, v14, v22
2375; GISEL-NEXT:    v_addc_u32_e32 v38, vcc, -1, v38, vcc
2376; GISEL-NEXT:    v_addc_u32_e32 v39, vcc, -1, v39, vcc
2377; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v48, v3
2378; GISEL-NEXT:    v_subb_u32_e32 v0, vcc, v49, v53, vcc
2379; GISEL-NEXT:    v_or_b32_e32 v0, v36, v38
2380; GISEL-NEXT:    v_or_b32_e32 v1, v37, v39
2381; GISEL-NEXT:    v_subb_u32_e32 v22, vcc, v50, v2, vcc
2382; GISEL-NEXT:    v_subb_u32_e32 v22, vcc, v51, v27, vcc
2383; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
2384; GISEL-NEXT:    v_ashrrev_i32_e32 v0, 31, v22
2385; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2386; GISEL-NEXT:    v_and_b32_e32 v22, 1, v0
2387; GISEL-NEXT:    v_and_b32_e32 v1, v0, v35
2388; GISEL-NEXT:    v_and_b32_e32 v25, v0, v34
2389; GISEL-NEXT:    v_and_b32_e32 v26, v0, v4
2390; GISEL-NEXT:    v_and_b32_e32 v52, v0, v5
2391; GISEL-NEXT:    v_sub_i32_e32 v24, vcc, v3, v1
2392; GISEL-NEXT:    v_subb_u32_e32 v25, vcc, v53, v25, vcc
2393; GISEL-NEXT:    v_mov_b32_e32 v0, v22
2394; GISEL-NEXT:    v_mov_b32_e32 v1, v23
2395; GISEL-NEXT:    v_subb_u32_e32 v26, vcc, v2, v26, vcc
2396; GISEL-NEXT:    v_subb_u32_e32 v27, vcc, v27, v52, vcc
2397; GISEL-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2398; GISEL-NEXT:    s_cbranch_execnz .LBB2_9
2399; GISEL-NEXT:  ; %bb.10: ; %Flow
2400; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
2401; GISEL-NEXT:  .LBB2_11: ; %Flow11
2402; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
2403; GISEL-NEXT:    v_lshl_b64 v[22:23], v[20:21], 1
2404; GISEL-NEXT:    v_lshl_b64 v[2:3], v[14:15], 1
2405; GISEL-NEXT:    v_lshrrev_b32_e32 v14, 31, v21
2406; GISEL-NEXT:    v_or_b32_e32 v2, v2, v14
2407; GISEL-NEXT:    v_or_b32_e32 v20, v0, v22
2408; GISEL-NEXT:    v_or_b32_e32 v21, v1, v23
2409; GISEL-NEXT:  .LBB2_12: ; %Flow12
2410; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
2411; GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v30, v31, 0
2412; GISEL-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v30, v18, 0
2413; GISEL-NEXT:    v_mul_lo_u32 v24, v30, v19
2414; GISEL-NEXT:    v_mul_lo_u32 v25, v29, v18
2415; GISEL-NEXT:    v_mad_u64_u32 v[18:19], s[4:5], v35, v20, 0
2416; GISEL-NEXT:    v_mad_u64_u32 v[22:23], s[4:5], v35, v2, 0
2417; GISEL-NEXT:    v_mul_lo_u32 v26, v35, v3
2418; GISEL-NEXT:    v_mul_lo_u32 v27, v34, v2
2419; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v29, v32, v[14:15]
2420; GISEL-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v34, v21, v[22:23]
2421; GISEL-NEXT:    v_mov_b32_e32 v22, v19
2422; GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v10, v31, v[2:3]
2423; GISEL-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v4, v20, v[14:15]
2424; GISEL-NEXT:    v_mad_u64_u32 v[1:2], vcc, v30, v32, v[1:2]
2425; GISEL-NEXT:    v_mov_b32_e32 v23, v14
2426; GISEL-NEXT:    v_mad_u64_u32 v[22:23], s[4:5], v35, v21, v[22:23]
2427; GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[6:7], v29, v31, v[1:2]
2428; GISEL-NEXT:    v_addc_u32_e64 v3, s[6:7], v3, v24, s[6:7]
2429; GISEL-NEXT:    v_mad_u64_u32 v[22:23], s[6:7], v34, v20, v[22:23]
2430; GISEL-NEXT:    v_addc_u32_e64 v14, s[6:7], v15, v26, s[6:7]
2431; GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v3, v25, vcc
2432; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v16, v0
2433; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v17, v1, vcc
2434; GISEL-NEXT:    v_xor_b32_e32 v15, v0, v28
2435; GISEL-NEXT:    v_addc_u32_e64 v0, s[4:5], v14, v27, s[4:5]
2436; GISEL-NEXT:    v_sub_i32_e64 v12, s[4:5], v12, v18
2437; GISEL-NEXT:    v_subb_u32_e64 v14, s[4:5], v13, v22, s[4:5]
2438; GISEL-NEXT:    v_xor_b32_e32 v16, v12, v33
2439; GISEL-NEXT:    v_mad_u64_u32 v[12:13], s[6:7], v10, v32, v[3:4]
2440; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v28
2441; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[6:7], v4, v21, v[0:1]
2442; GISEL-NEXT:    v_xor_b32_e32 v14, v14, v33
2443; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[6:7], v11, v31, v[12:13]
2444; GISEL-NEXT:    v_sub_i32_e64 v0, s[6:7], v15, v28
2445; GISEL-NEXT:    v_subb_u32_e64 v1, s[6:7], v1, v28, s[6:7]
2446; GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[8:9], v5, v20, v[3:4]
2447; GISEL-NEXT:    v_sub_i32_e64 v4, s[8:9], v16, v33
2448; GISEL-NEXT:    v_subb_u32_e64 v5, s[8:9], v14, v33, s[8:9]
2449; GISEL-NEXT:    v_subb_u32_e32 v2, vcc, v8, v2, vcc
2450; GISEL-NEXT:    v_subb_u32_e32 v8, vcc, v9, v10, vcc
2451; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v28
2452; GISEL-NEXT:    v_subb_u32_e64 v6, vcc, v6, v23, s[4:5]
2453; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v7, v3, vcc
2454; GISEL-NEXT:    v_xor_b32_e32 v6, v6, v33
2455; GISEL-NEXT:    v_xor_b32_e32 v7, v8, v28
2456; GISEL-NEXT:    v_xor_b32_e32 v8, v3, v33
2457; GISEL-NEXT:    v_subb_u32_e64 v2, vcc, v2, v28, s[6:7]
2458; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v7, v28, vcc
2459; GISEL-NEXT:    v_subb_u32_e64 v6, vcc, v6, v33, s[8:9]
2460; GISEL-NEXT:    v_subb_u32_e32 v7, vcc, v8, v33, vcc
2461; GISEL-NEXT:    s_setpc_b64 s[30:31]
2462  %shl = srem <2 x i128> %lhs, %rhs
2463  ret <2 x i128> %shl
2464}
2465
2466define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
2467; SDAG-LABEL: v_urem_v2i128_vv:
2468; SDAG:       ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
2469; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2470; SDAG-NEXT:    v_or_b32_e32 v17, v9, v11
2471; SDAG-NEXT:    v_or_b32_e32 v16, v8, v10
2472; SDAG-NEXT:    v_or_b32_e32 v19, v1, v3
2473; SDAG-NEXT:    v_or_b32_e32 v18, v0, v2
2474; SDAG-NEXT:    v_ffbh_u32_e32 v20, v10
2475; SDAG-NEXT:    v_ffbh_u32_e32 v21, v11
2476; SDAG-NEXT:    v_ffbh_u32_e32 v22, v8
2477; SDAG-NEXT:    v_ffbh_u32_e32 v23, v9
2478; SDAG-NEXT:    v_ffbh_u32_e32 v24, v2
2479; SDAG-NEXT:    v_ffbh_u32_e32 v25, v3
2480; SDAG-NEXT:    v_ffbh_u32_e32 v26, v0
2481; SDAG-NEXT:    v_ffbh_u32_e32 v27, v1
2482; SDAG-NEXT:    v_mov_b32_e32 v28, 0
2483; SDAG-NEXT:    s_mov_b64 s[8:9], 0x7f
2484; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
2485; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
2486; SDAG-NEXT:    v_add_i32_e64 v16, s[6:7], 32, v20
2487; SDAG-NEXT:    v_add_i32_e64 v17, s[6:7], 32, v22
2488; SDAG-NEXT:    v_add_i32_e64 v18, s[6:7], 32, v24
2489; SDAG-NEXT:    v_add_i32_e64 v19, s[6:7], 32, v26
2490; SDAG-NEXT:    v_min_u32_e32 v16, v16, v21
2491; SDAG-NEXT:    v_min_u32_e32 v17, v17, v23
2492; SDAG-NEXT:    v_min_u32_e32 v18, v18, v25
2493; SDAG-NEXT:    v_min_u32_e32 v19, v19, v27
2494; SDAG-NEXT:    s_or_b64 s[6:7], vcc, s[4:5]
2495; SDAG-NEXT:    v_add_i32_e32 v17, vcc, 64, v17
2496; SDAG-NEXT:    v_addc_u32_e64 v20, s[4:5], 0, 0, vcc
2497; SDAG-NEXT:    v_add_i32_e32 v19, vcc, 64, v19
2498; SDAG-NEXT:    v_addc_u32_e64 v21, s[4:5], 0, 0, vcc
2499; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
2500; SDAG-NEXT:    v_cndmask_b32_e64 v20, v20, 0, vcc
2501; SDAG-NEXT:    v_cndmask_b32_e32 v16, v17, v16, vcc
2502; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
2503; SDAG-NEXT:    v_cndmask_b32_e64 v17, v21, 0, vcc
2504; SDAG-NEXT:    v_cndmask_b32_e32 v18, v19, v18, vcc
2505; SDAG-NEXT:    v_sub_i32_e32 v18, vcc, v16, v18
2506; SDAG-NEXT:    v_subb_u32_e32 v19, vcc, v20, v17, vcc
2507; SDAG-NEXT:    v_xor_b32_e32 v16, 0x7f, v18
2508; SDAG-NEXT:    v_subbrev_u32_e32 v20, vcc, 0, v28, vcc
2509; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[8:9], v[18:19]
2510; SDAG-NEXT:    v_cndmask_b32_e64 v22, 0, 1, s[4:5]
2511; SDAG-NEXT:    v_subbrev_u32_e32 v21, vcc, 0, v28, vcc
2512; SDAG-NEXT:    v_or_b32_e32 v16, v16, v20
2513; SDAG-NEXT:    v_or_b32_e32 v17, v19, v21
2514; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[20:21]
2515; SDAG-NEXT:    v_cndmask_b32_e64 v23, 0, 1, vcc
2516; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[16:17]
2517; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[20:21]
2518; SDAG-NEXT:    v_cndmask_b32_e64 v16, v23, v22, s[4:5]
2519; SDAG-NEXT:    v_and_b32_e32 v16, 1, v16
2520; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v16
2521; SDAG-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
2522; SDAG-NEXT:    v_cndmask_b32_e64 v33, v3, 0, s[4:5]
2523; SDAG-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
2524; SDAG-NEXT:    v_cndmask_b32_e64 v31, v2, 0, s[4:5]
2525; SDAG-NEXT:    v_cndmask_b32_e64 v30, v1, 0, s[4:5]
2526; SDAG-NEXT:    s_and_b64 s[8:9], s[6:7], vcc
2527; SDAG-NEXT:    v_cndmask_b32_e64 v32, v0, 0, s[4:5]
2528; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[8:9]
2529; SDAG-NEXT:    s_cbranch_execz .LBB3_6
2530; SDAG-NEXT:  ; %bb.1: ; %udiv-bb15
2531; SDAG-NEXT:    v_add_i32_e32 v30, vcc, 1, v18
2532; SDAG-NEXT:    v_sub_i32_e64 v22, s[4:5], 63, v18
2533; SDAG-NEXT:    v_mov_b32_e32 v16, 0
2534; SDAG-NEXT:    v_mov_b32_e32 v17, 0
2535; SDAG-NEXT:    v_addc_u32_e32 v31, vcc, 0, v19, vcc
2536; SDAG-NEXT:    v_lshl_b64 v[22:23], v[0:1], v22
2537; SDAG-NEXT:    v_addc_u32_e32 v32, vcc, 0, v20, vcc
2538; SDAG-NEXT:    v_addc_u32_e32 v33, vcc, 0, v21, vcc
2539; SDAG-NEXT:    v_or_b32_e32 v19, v30, v32
2540; SDAG-NEXT:    v_sub_i32_e32 v21, vcc, 0x7f, v18
2541; SDAG-NEXT:    v_or_b32_e32 v20, v31, v33
2542; SDAG-NEXT:    v_lshl_b64 v[24:25], v[2:3], v21
2543; SDAG-NEXT:    v_sub_i32_e32 v18, vcc, 64, v21
2544; SDAG-NEXT:    v_lshl_b64 v[26:27], v[0:1], v21
2545; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[19:20]
2546; SDAG-NEXT:    v_lshr_b64 v[18:19], v[0:1], v18
2547; SDAG-NEXT:    v_or_b32_e32 v19, v25, v19
2548; SDAG-NEXT:    v_or_b32_e32 v18, v24, v18
2549; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v21
2550; SDAG-NEXT:    v_cndmask_b32_e64 v19, v23, v19, s[4:5]
2551; SDAG-NEXT:    v_cndmask_b32_e64 v18, v22, v18, s[4:5]
2552; SDAG-NEXT:    v_cndmask_b32_e64 v23, 0, v27, s[4:5]
2553; SDAG-NEXT:    v_cndmask_b32_e64 v22, 0, v26, s[4:5]
2554; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v21
2555; SDAG-NEXT:    v_cndmask_b32_e64 v19, v19, v3, s[4:5]
2556; SDAG-NEXT:    v_cndmask_b32_e64 v18, v18, v2, s[4:5]
2557; SDAG-NEXT:    v_mov_b32_e32 v20, 0
2558; SDAG-NEXT:    v_mov_b32_e32 v21, 0
2559; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2560; SDAG-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
2561; SDAG-NEXT:    s_cbranch_execz .LBB3_5
2562; SDAG-NEXT:  ; %bb.2: ; %udiv-preheader4
2563; SDAG-NEXT:    v_lshr_b64 v[16:17], v[0:1], v30
2564; SDAG-NEXT:    v_sub_i32_e32 v28, vcc, 64, v30
2565; SDAG-NEXT:    v_subrev_i32_e32 v35, vcc, 64, v30
2566; SDAG-NEXT:    v_lshr_b64 v[26:27], v[2:3], v30
2567; SDAG-NEXT:    v_add_i32_e32 v34, vcc, -1, v8
2568; SDAG-NEXT:    s_mov_b64 s[10:11], 0
2569; SDAG-NEXT:    v_mov_b32_e32 v24, 0
2570; SDAG-NEXT:    v_mov_b32_e32 v25, 0
2571; SDAG-NEXT:    v_mov_b32_e32 v20, 0
2572; SDAG-NEXT:    v_mov_b32_e32 v21, 0
2573; SDAG-NEXT:    v_lshl_b64 v[28:29], v[2:3], v28
2574; SDAG-NEXT:    v_lshr_b64 v[37:38], v[2:3], v35
2575; SDAG-NEXT:    v_addc_u32_e32 v35, vcc, -1, v9, vcc
2576; SDAG-NEXT:    v_or_b32_e32 v17, v17, v29
2577; SDAG-NEXT:    v_or_b32_e32 v16, v16, v28
2578; SDAG-NEXT:    v_addc_u32_e32 v36, vcc, -1, v10, vcc
2579; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v30
2580; SDAG-NEXT:    v_cndmask_b32_e64 v17, v38, v17, s[4:5]
2581; SDAG-NEXT:    v_cndmask_b32_e64 v16, v37, v16, s[4:5]
2582; SDAG-NEXT:    v_cndmask_b32_e64 v29, 0, v27, s[4:5]
2583; SDAG-NEXT:    v_cndmask_b32_e64 v28, 0, v26, s[4:5]
2584; SDAG-NEXT:    v_addc_u32_e32 v37, vcc, -1, v11, vcc
2585; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v30
2586; SDAG-NEXT:    v_cndmask_b32_e32 v27, v17, v1, vcc
2587; SDAG-NEXT:    v_cndmask_b32_e32 v26, v16, v0, vcc
2588; SDAG-NEXT:    v_mov_b32_e32 v17, 0
2589; SDAG-NEXT:  .LBB3_3: ; %udiv-do-while3
2590; SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
2591; SDAG-NEXT:    v_lshrrev_b32_e32 v16, 31, v23
2592; SDAG-NEXT:    v_lshl_b64 v[22:23], v[22:23], 1
2593; SDAG-NEXT:    v_lshl_b64 v[28:29], v[28:29], 1
2594; SDAG-NEXT:    v_lshrrev_b32_e32 v38, 31, v27
2595; SDAG-NEXT:    v_lshl_b64 v[26:27], v[26:27], 1
2596; SDAG-NEXT:    v_lshrrev_b32_e32 v39, 31, v19
2597; SDAG-NEXT:    v_lshl_b64 v[18:19], v[18:19], 1
2598; SDAG-NEXT:    v_or_b32_e32 v23, v25, v23
2599; SDAG-NEXT:    v_or_b32_e32 v22, v24, v22
2600; SDAG-NEXT:    v_or_b32_e32 v24, v28, v38
2601; SDAG-NEXT:    v_or_b32_e32 v25, v26, v39
2602; SDAG-NEXT:    v_or_b32_e32 v18, v18, v16
2603; SDAG-NEXT:    v_sub_i32_e32 v16, vcc, v34, v25
2604; SDAG-NEXT:    v_subb_u32_e32 v16, vcc, v35, v27, vcc
2605; SDAG-NEXT:    v_subb_u32_e32 v16, vcc, v36, v24, vcc
2606; SDAG-NEXT:    v_subb_u32_e32 v16, vcc, v37, v29, vcc
2607; SDAG-NEXT:    v_ashrrev_i32_e32 v16, 31, v16
2608; SDAG-NEXT:    v_and_b32_e32 v26, v16, v8
2609; SDAG-NEXT:    v_and_b32_e32 v28, v16, v9
2610; SDAG-NEXT:    v_and_b32_e32 v38, v16, v10
2611; SDAG-NEXT:    v_and_b32_e32 v39, v16, v11
2612; SDAG-NEXT:    v_and_b32_e32 v16, 1, v16
2613; SDAG-NEXT:    v_sub_i32_e32 v26, vcc, v25, v26
2614; SDAG-NEXT:    v_subb_u32_e32 v27, vcc, v27, v28, vcc
2615; SDAG-NEXT:    v_subb_u32_e32 v28, vcc, v24, v38, vcc
2616; SDAG-NEXT:    v_subb_u32_e32 v29, vcc, v29, v39, vcc
2617; SDAG-NEXT:    v_add_i32_e32 v30, vcc, -1, v30
2618; SDAG-NEXT:    v_addc_u32_e32 v31, vcc, -1, v31, vcc
2619; SDAG-NEXT:    v_addc_u32_e32 v32, vcc, -1, v32, vcc
2620; SDAG-NEXT:    v_addc_u32_e32 v33, vcc, -1, v33, vcc
2621; SDAG-NEXT:    v_or_b32_e32 v24, v30, v32
2622; SDAG-NEXT:    v_or_b32_e32 v25, v31, v33
2623; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[24:25]
2624; SDAG-NEXT:    v_or_b32_e32 v19, v21, v19
2625; SDAG-NEXT:    s_or_b64 s[10:11], vcc, s[10:11]
2626; SDAG-NEXT:    v_or_b32_e32 v18, v20, v18
2627; SDAG-NEXT:    v_mov_b32_e32 v25, v17
2628; SDAG-NEXT:    v_mov_b32_e32 v24, v16
2629; SDAG-NEXT:    s_andn2_b64 exec, exec, s[10:11]
2630; SDAG-NEXT:    s_cbranch_execnz .LBB3_3
2631; SDAG-NEXT:  ; %bb.4: ; %Flow13
2632; SDAG-NEXT:    s_or_b64 exec, exec, s[10:11]
2633; SDAG-NEXT:  .LBB3_5: ; %Flow14
2634; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
2635; SDAG-NEXT:    v_lshl_b64 v[18:19], v[18:19], 1
2636; SDAG-NEXT:    v_lshrrev_b32_e32 v24, 31, v23
2637; SDAG-NEXT:    v_lshl_b64 v[22:23], v[22:23], 1
2638; SDAG-NEXT:    v_or_b32_e32 v18, v18, v24
2639; SDAG-NEXT:    v_or_b32_e32 v33, v21, v19
2640; SDAG-NEXT:    v_or_b32_e32 v30, v17, v23
2641; SDAG-NEXT:    v_or_b32_e32 v31, v20, v18
2642; SDAG-NEXT:    v_or_b32_e32 v32, v16, v22
2643; SDAG-NEXT:  .LBB3_6: ; %Flow16
2644; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
2645; SDAG-NEXT:    v_or_b32_e32 v17, v13, v15
2646; SDAG-NEXT:    v_or_b32_e32 v16, v12, v14
2647; SDAG-NEXT:    v_or_b32_e32 v19, v5, v7
2648; SDAG-NEXT:    v_or_b32_e32 v18, v4, v6
2649; SDAG-NEXT:    v_ffbh_u32_e32 v20, v14
2650; SDAG-NEXT:    v_ffbh_u32_e32 v21, v15
2651; SDAG-NEXT:    v_ffbh_u32_e32 v22, v12
2652; SDAG-NEXT:    v_ffbh_u32_e32 v23, v13
2653; SDAG-NEXT:    v_ffbh_u32_e32 v24, v6
2654; SDAG-NEXT:    v_ffbh_u32_e32 v25, v7
2655; SDAG-NEXT:    v_ffbh_u32_e32 v26, v4
2656; SDAG-NEXT:    v_ffbh_u32_e32 v27, v5
2657; SDAG-NEXT:    v_mov_b32_e32 v28, 0
2658; SDAG-NEXT:    s_mov_b64 s[8:9], 0x7f
2659; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
2660; SDAG-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
2661; SDAG-NEXT:    v_add_i32_e64 v16, s[6:7], 32, v20
2662; SDAG-NEXT:    v_add_i32_e64 v17, s[6:7], 32, v22
2663; SDAG-NEXT:    v_add_i32_e64 v18, s[6:7], 32, v24
2664; SDAG-NEXT:    v_add_i32_e64 v19, s[6:7], 32, v26
2665; SDAG-NEXT:    s_or_b64 s[6:7], vcc, s[4:5]
2666; SDAG-NEXT:    v_min_u32_e32 v16, v16, v21
2667; SDAG-NEXT:    v_min_u32_e32 v17, v17, v23
2668; SDAG-NEXT:    v_min_u32_e32 v18, v18, v25
2669; SDAG-NEXT:    v_min_u32_e32 v19, v19, v27
2670; SDAG-NEXT:    v_add_i32_e32 v17, vcc, 64, v17
2671; SDAG-NEXT:    v_addc_u32_e64 v20, s[4:5], 0, 0, vcc
2672; SDAG-NEXT:    v_add_i32_e32 v19, vcc, 64, v19
2673; SDAG-NEXT:    v_addc_u32_e64 v21, s[4:5], 0, 0, vcc
2674; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
2675; SDAG-NEXT:    v_cndmask_b32_e64 v20, v20, 0, vcc
2676; SDAG-NEXT:    v_cndmask_b32_e32 v16, v17, v16, vcc
2677; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
2678; SDAG-NEXT:    v_cndmask_b32_e64 v17, v21, 0, vcc
2679; SDAG-NEXT:    v_cndmask_b32_e32 v18, v19, v18, vcc
2680; SDAG-NEXT:    v_sub_i32_e32 v16, vcc, v16, v18
2681; SDAG-NEXT:    v_subb_u32_e32 v17, vcc, v20, v17, vcc
2682; SDAG-NEXT:    v_xor_b32_e32 v18, 0x7f, v16
2683; SDAG-NEXT:    v_subbrev_u32_e32 v20, vcc, 0, v28, vcc
2684; SDAG-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[8:9], v[16:17]
2685; SDAG-NEXT:    v_cndmask_b32_e64 v22, 0, 1, s[4:5]
2686; SDAG-NEXT:    v_subbrev_u32_e32 v21, vcc, 0, v28, vcc
2687; SDAG-NEXT:    v_or_b32_e32 v18, v18, v20
2688; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[20:21]
2689; SDAG-NEXT:    v_cndmask_b32_e64 v23, 0, 1, vcc
2690; SDAG-NEXT:    v_or_b32_e32 v19, v17, v21
2691; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[20:21]
2692; SDAG-NEXT:    v_cndmask_b32_e32 v22, v23, v22, vcc
2693; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[18:19]
2694; SDAG-NEXT:    v_and_b32_e32 v18, 1, v22
2695; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v18
2696; SDAG-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
2697; SDAG-NEXT:    v_cndmask_b32_e64 v23, v7, 0, s[4:5]
2698; SDAG-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
2699; SDAG-NEXT:    v_cndmask_b32_e64 v22, v6, 0, s[4:5]
2700; SDAG-NEXT:    v_cndmask_b32_e64 v19, v5, 0, s[4:5]
2701; SDAG-NEXT:    v_cndmask_b32_e64 v18, v4, 0, s[4:5]
2702; SDAG-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
2703; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
2704; SDAG-NEXT:    s_cbranch_execz .LBB3_12
2705; SDAG-NEXT:  ; %bb.7: ; %udiv-bb1
2706; SDAG-NEXT:    v_add_i32_e32 v34, vcc, 1, v16
2707; SDAG-NEXT:    v_sub_i32_e64 v22, s[4:5], 63, v16
2708; SDAG-NEXT:    v_mov_b32_e32 v18, 0
2709; SDAG-NEXT:    v_mov_b32_e32 v19, 0
2710; SDAG-NEXT:    v_addc_u32_e32 v35, vcc, 0, v17, vcc
2711; SDAG-NEXT:    v_lshl_b64 v[22:23], v[4:5], v22
2712; SDAG-NEXT:    v_addc_u32_e32 v36, vcc, 0, v20, vcc
2713; SDAG-NEXT:    v_addc_u32_e32 v37, vcc, 0, v21, vcc
2714; SDAG-NEXT:    v_or_b32_e32 v20, v34, v36
2715; SDAG-NEXT:    v_sub_i32_e32 v26, vcc, 0x7f, v16
2716; SDAG-NEXT:    v_or_b32_e32 v21, v35, v37
2717; SDAG-NEXT:    v_lshl_b64 v[16:17], v[6:7], v26
2718; SDAG-NEXT:    v_sub_i32_e32 v27, vcc, 64, v26
2719; SDAG-NEXT:    v_lshl_b64 v[24:25], v[4:5], v26
2720; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[20:21]
2721; SDAG-NEXT:    v_lshr_b64 v[20:21], v[4:5], v27
2722; SDAG-NEXT:    v_or_b32_e32 v17, v17, v21
2723; SDAG-NEXT:    v_or_b32_e32 v16, v16, v20
2724; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v26
2725; SDAG-NEXT:    v_cndmask_b32_e64 v17, v23, v17, s[4:5]
2726; SDAG-NEXT:    v_cndmask_b32_e64 v16, v22, v16, s[4:5]
2727; SDAG-NEXT:    v_cndmask_b32_e64 v21, 0, v25, s[4:5]
2728; SDAG-NEXT:    v_cndmask_b32_e64 v20, 0, v24, s[4:5]
2729; SDAG-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v26
2730; SDAG-NEXT:    v_cndmask_b32_e64 v17, v17, v7, s[4:5]
2731; SDAG-NEXT:    v_cndmask_b32_e64 v16, v16, v6, s[4:5]
2732; SDAG-NEXT:    v_mov_b32_e32 v22, 0
2733; SDAG-NEXT:    v_mov_b32_e32 v23, 0
2734; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2735; SDAG-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
2736; SDAG-NEXT:    s_cbranch_execz .LBB3_11
2737; SDAG-NEXT:  ; %bb.8: ; %udiv-preheader
2738; SDAG-NEXT:    v_lshr_b64 v[18:19], v[4:5], v34
2739; SDAG-NEXT:    v_sub_i32_e32 v28, vcc, 64, v34
2740; SDAG-NEXT:    v_subrev_i32_e32 v39, vcc, 64, v34
2741; SDAG-NEXT:    v_lshr_b64 v[26:27], v[6:7], v34
2742; SDAG-NEXT:    v_add_i32_e32 v38, vcc, -1, v12
2743; SDAG-NEXT:    s_mov_b64 s[10:11], 0
2744; SDAG-NEXT:    v_mov_b32_e32 v24, 0
2745; SDAG-NEXT:    v_mov_b32_e32 v25, 0
2746; SDAG-NEXT:    v_mov_b32_e32 v22, 0
2747; SDAG-NEXT:    v_mov_b32_e32 v23, 0
2748; SDAG-NEXT:    v_lshl_b64 v[28:29], v[6:7], v28
2749; SDAG-NEXT:    v_lshr_b64 v[49:50], v[6:7], v39
2750; SDAG-NEXT:    v_addc_u32_e32 v39, vcc, -1, v13, vcc
2751; SDAG-NEXT:    v_or_b32_e32 v19, v19, v29
2752; SDAG-NEXT:    v_or_b32_e32 v18, v18, v28
2753; SDAG-NEXT:    v_addc_u32_e32 v48, vcc, -1, v14, vcc
2754; SDAG-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v34
2755; SDAG-NEXT:    v_cndmask_b32_e64 v19, v50, v19, s[4:5]
2756; SDAG-NEXT:    v_cndmask_b32_e64 v18, v49, v18, s[4:5]
2757; SDAG-NEXT:    v_cndmask_b32_e64 v29, 0, v27, s[4:5]
2758; SDAG-NEXT:    v_cndmask_b32_e64 v28, 0, v26, s[4:5]
2759; SDAG-NEXT:    v_addc_u32_e32 v49, vcc, -1, v15, vcc
2760; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v34
2761; SDAG-NEXT:    v_cndmask_b32_e32 v27, v19, v5, vcc
2762; SDAG-NEXT:    v_cndmask_b32_e32 v26, v18, v4, vcc
2763; SDAG-NEXT:    v_mov_b32_e32 v19, 0
2764; SDAG-NEXT:  .LBB3_9: ; %udiv-do-while
2765; SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
2766; SDAG-NEXT:    v_lshl_b64 v[28:29], v[28:29], 1
2767; SDAG-NEXT:    v_lshrrev_b32_e32 v18, 31, v27
2768; SDAG-NEXT:    v_lshl_b64 v[26:27], v[26:27], 1
2769; SDAG-NEXT:    v_lshrrev_b32_e32 v50, 31, v17
2770; SDAG-NEXT:    v_lshl_b64 v[16:17], v[16:17], 1
2771; SDAG-NEXT:    v_lshrrev_b32_e32 v51, 31, v21
2772; SDAG-NEXT:    v_lshl_b64 v[20:21], v[20:21], 1
2773; SDAG-NEXT:    v_or_b32_e32 v18, v28, v18
2774; SDAG-NEXT:    v_or_b32_e32 v26, v26, v50
2775; SDAG-NEXT:    v_or_b32_e32 v16, v16, v51
2776; SDAG-NEXT:    v_or_b32_e32 v17, v23, v17
2777; SDAG-NEXT:    v_or_b32_e32 v21, v25, v21
2778; SDAG-NEXT:    v_sub_i32_e32 v25, vcc, v38, v26
2779; SDAG-NEXT:    v_or_b32_e32 v16, v22, v16
2780; SDAG-NEXT:    v_subb_u32_e32 v25, vcc, v39, v27, vcc
2781; SDAG-NEXT:    v_subb_u32_e32 v25, vcc, v48, v18, vcc
2782; SDAG-NEXT:    v_subb_u32_e32 v25, vcc, v49, v29, vcc
2783; SDAG-NEXT:    v_ashrrev_i32_e32 v25, 31, v25
2784; SDAG-NEXT:    v_and_b32_e32 v28, v25, v12
2785; SDAG-NEXT:    v_and_b32_e32 v50, v25, v13
2786; SDAG-NEXT:    v_and_b32_e32 v51, v25, v14
2787; SDAG-NEXT:    v_and_b32_e32 v52, v25, v15
2788; SDAG-NEXT:    v_sub_i32_e32 v26, vcc, v26, v28
2789; SDAG-NEXT:    v_subb_u32_e32 v27, vcc, v27, v50, vcc
2790; SDAG-NEXT:    v_subb_u32_e32 v28, vcc, v18, v51, vcc
2791; SDAG-NEXT:    v_subb_u32_e32 v29, vcc, v29, v52, vcc
2792; SDAG-NEXT:    v_add_i32_e32 v34, vcc, -1, v34
2793; SDAG-NEXT:    v_addc_u32_e32 v35, vcc, -1, v35, vcc
2794; SDAG-NEXT:    v_addc_u32_e32 v36, vcc, -1, v36, vcc
2795; SDAG-NEXT:    v_addc_u32_e32 v37, vcc, -1, v37, vcc
2796; SDAG-NEXT:    v_or_b32_e32 v50, v34, v36
2797; SDAG-NEXT:    v_or_b32_e32 v51, v35, v37
2798; SDAG-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[50:51]
2799; SDAG-NEXT:    v_and_b32_e32 v18, 1, v25
2800; SDAG-NEXT:    s_or_b64 s[10:11], vcc, s[10:11]
2801; SDAG-NEXT:    v_or_b32_e32 v20, v24, v20
2802; SDAG-NEXT:    v_mov_b32_e32 v25, v19
2803; SDAG-NEXT:    v_mov_b32_e32 v24, v18
2804; SDAG-NEXT:    s_andn2_b64 exec, exec, s[10:11]
2805; SDAG-NEXT:    s_cbranch_execnz .LBB3_9
2806; SDAG-NEXT:  ; %bb.10: ; %Flow
2807; SDAG-NEXT:    s_or_b64 exec, exec, s[10:11]
2808; SDAG-NEXT:  .LBB3_11: ; %Flow11
2809; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
2810; SDAG-NEXT:    v_lshl_b64 v[16:17], v[16:17], 1
2811; SDAG-NEXT:    v_lshrrev_b32_e32 v24, 31, v21
2812; SDAG-NEXT:    v_lshl_b64 v[20:21], v[20:21], 1
2813; SDAG-NEXT:    v_or_b32_e32 v16, v16, v24
2814; SDAG-NEXT:    v_or_b32_e32 v23, v23, v17
2815; SDAG-NEXT:    v_or_b32_e32 v19, v19, v21
2816; SDAG-NEXT:    v_or_b32_e32 v22, v22, v16
2817; SDAG-NEXT:    v_or_b32_e32 v18, v18, v20
2818; SDAG-NEXT:  .LBB3_12: ; %Flow12
2819; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
2820; SDAG-NEXT:    v_mul_lo_u32 v20, v32, v11
2821; SDAG-NEXT:    v_mad_u64_u32 v[16:17], s[4:5], v32, v10, 0
2822; SDAG-NEXT:    v_mul_lo_u32 v28, v30, v10
2823; SDAG-NEXT:    v_mul_lo_u32 v29, v33, v8
2824; SDAG-NEXT:    v_mul_lo_u32 v33, v31, v9
2825; SDAG-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v8, v32, 0
2826; SDAG-NEXT:    v_mov_b32_e32 v21, 0
2827; SDAG-NEXT:    v_mul_lo_u32 v34, v18, v15
2828; SDAG-NEXT:    v_mad_u64_u32 v[24:25], s[4:5], v18, v14, 0
2829; SDAG-NEXT:    v_mul_lo_u32 v35, v19, v14
2830; SDAG-NEXT:    v_mul_lo_u32 v23, v23, v12
2831; SDAG-NEXT:    v_mul_lo_u32 v36, v22, v13
2832; SDAG-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v12, v18, 0
2833; SDAG-NEXT:    v_add_i32_e32 v17, vcc, v17, v20
2834; SDAG-NEXT:    v_mov_b32_e32 v20, v11
2835; SDAG-NEXT:    v_mad_u64_u32 v[26:27], s[4:5], v9, v32, v[20:21]
2836; SDAG-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
2837; SDAG-NEXT:    v_add_i32_e64 v20, s[4:5], v25, v34
2838; SDAG-NEXT:    v_add_i32_e64 v17, s[4:5], v17, v28
2839; SDAG-NEXT:    v_mov_b32_e32 v28, v27
2840; SDAG-NEXT:    v_mov_b32_e32 v27, v21
2841; SDAG-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v8, v30, v[26:27]
2842; SDAG-NEXT:    v_add_i32_e64 v25, s[4:5], v20, v35
2843; SDAG-NEXT:    v_mov_b32_e32 v20, v15
2844; SDAG-NEXT:    v_mad_u64_u32 v[26:27], s[4:5], v13, v18, v[20:21]
2845; SDAG-NEXT:    v_mad_u64_u32 v[15:16], s[4:5], v31, v8, v[16:17]
2846; SDAG-NEXT:    v_mov_b32_e32 v8, v11
2847; SDAG-NEXT:    v_add_i32_e64 v17, s[4:5], v28, v8
2848; SDAG-NEXT:    v_addc_u32_e64 v18, s[4:5], 0, 0, s[4:5]
2849; SDAG-NEXT:    v_mov_b32_e32 v8, v10
2850; SDAG-NEXT:    v_subb_u32_e32 v1, vcc, v1, v8, vcc
2851; SDAG-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v22, v12, v[24:25]
2852; SDAG-NEXT:    v_mov_b32_e32 v22, v27
2853; SDAG-NEXT:    v_mov_b32_e32 v27, v21
2854; SDAG-NEXT:    v_mad_u64_u32 v[20:21], s[4:5], v12, v19, v[26:27]
2855; SDAG-NEXT:    v_add_i32_e64 v16, s[4:5], v29, v16
2856; SDAG-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v9, v30, v[17:18]
2857; SDAG-NEXT:    v_add_i32_e64 v17, s[4:5], v23, v11
2858; SDAG-NEXT:    v_mov_b32_e32 v11, v21
2859; SDAG-NEXT:    v_add_i32_e64 v11, s[4:5], v22, v11
2860; SDAG-NEXT:    v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5]
2861; SDAG-NEXT:    v_add_i32_e64 v16, s[4:5], v33, v16
2862; SDAG-NEXT:    v_add_i32_e64 v17, s[4:5], v36, v17
2863; SDAG-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v13, v19, v[11:12]
2864; SDAG-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v15
2865; SDAG-NEXT:    v_addc_u32_e64 v9, s[4:5], v9, v16, s[4:5]
2866; SDAG-NEXT:    v_subb_u32_e32 v2, vcc, v2, v8, vcc
2867; SDAG-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
2868; SDAG-NEXT:    v_add_i32_e32 v8, vcc, v11, v10
2869; SDAG-NEXT:    v_addc_u32_e32 v9, vcc, v12, v17, vcc
2870; SDAG-NEXT:    v_mov_b32_e32 v10, v20
2871; SDAG-NEXT:    v_sub_i32_e32 v4, vcc, v4, v14
2872; SDAG-NEXT:    v_subb_u32_e32 v5, vcc, v5, v10, vcc
2873; SDAG-NEXT:    v_subb_u32_e32 v6, vcc, v6, v8, vcc
2874; SDAG-NEXT:    v_subb_u32_e32 v7, vcc, v7, v9, vcc
2875; SDAG-NEXT:    s_setpc_b64 s[30:31]
2876;
2877; GISEL-LABEL: v_urem_v2i128_vv:
2878; GISEL:       ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
2879; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2880; GISEL-NEXT:    v_or_b32_e32 v16, v8, v10
2881; GISEL-NEXT:    v_or_b32_e32 v17, v9, v11
2882; GISEL-NEXT:    v_or_b32_e32 v18, v0, v2
2883; GISEL-NEXT:    v_or_b32_e32 v19, v1, v3
2884; GISEL-NEXT:    v_ffbh_u32_e32 v22, v9
2885; GISEL-NEXT:    v_ffbh_u32_e32 v23, v8
2886; GISEL-NEXT:    v_ffbh_u32_e32 v24, v11
2887; GISEL-NEXT:    v_ffbh_u32_e32 v25, v10
2888; GISEL-NEXT:    v_ffbh_u32_e32 v26, v1
2889; GISEL-NEXT:    v_ffbh_u32_e32 v27, v0
2890; GISEL-NEXT:    v_ffbh_u32_e32 v28, v2
2891; GISEL-NEXT:    v_ffbh_u32_e32 v29, v3
2892; GISEL-NEXT:    v_mov_b32_e32 v20, 0x7f
2893; GISEL-NEXT:    v_mov_b32_e32 v21, 0
2894; GISEL-NEXT:    s_mov_b64 s[8:9], 0
2895; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
2896; GISEL-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
2897; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], 32, v23
2898; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], 32, v25
2899; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], 32, v27
2900; GISEL-NEXT:    v_add_i32_e64 v19, s[6:7], 32, v28
2901; GISEL-NEXT:    v_min_u32_e32 v16, v22, v16
2902; GISEL-NEXT:    v_min_u32_e32 v17, v24, v17
2903; GISEL-NEXT:    v_min_u32_e32 v18, v26, v18
2904; GISEL-NEXT:    v_min_u32_e32 v19, v29, v19
2905; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2906; GISEL-NEXT:    v_cndmask_b32_e64 v22, 0, 1, s[4:5]
2907; GISEL-NEXT:    v_add_i32_e32 v16, vcc, 64, v16
2908; GISEL-NEXT:    v_add_i32_e32 v18, vcc, 64, v18
2909; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
2910; GISEL-NEXT:    v_cndmask_b32_e32 v16, v17, v16, vcc
2911; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
2912; GISEL-NEXT:    v_cndmask_b32_e32 v17, v19, v18, vcc
2913; GISEL-NEXT:    v_sub_i32_e32 v18, vcc, v16, v17
2914; GISEL-NEXT:    v_subb_u32_e64 v19, s[4:5], 0, 0, vcc
2915; GISEL-NEXT:    v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5]
2916; GISEL-NEXT:    v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5]
2917; GISEL-NEXT:    v_xor_b32_e32 v23, 0x7f, v18
2918; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[18:19], v[20:21]
2919; GISEL-NEXT:    v_cndmask_b32_e64 v24, 0, 1, vcc
2920; GISEL-NEXT:    v_or_b32_e32 v20, v23, v16
2921; GISEL-NEXT:    v_or_b32_e32 v21, v19, v17
2922; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[16:17]
2923; GISEL-NEXT:    v_cndmask_b32_e64 v23, 0, 1, vcc
2924; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
2925; GISEL-NEXT:    v_cndmask_b32_e32 v23, v23, v24, vcc
2926; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[20:21]
2927; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
2928; GISEL-NEXT:    v_or_b32_e32 v21, v22, v23
2929; GISEL-NEXT:    v_or_b32_e32 v20, v21, v20
2930; GISEL-NEXT:    v_and_b32_e32 v21, 1, v21
2931; GISEL-NEXT:    v_and_b32_e32 v20, 1, v20
2932; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v21
2933; GISEL-NEXT:    v_cndmask_b32_e64 v32, v0, 0, vcc
2934; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v20
2935; GISEL-NEXT:    v_cndmask_b32_e64 v20, v2, 0, vcc
2936; GISEL-NEXT:    v_cndmask_b32_e64 v21, v3, 0, vcc
2937; GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
2938; GISEL-NEXT:    v_cndmask_b32_e64 v33, v1, 0, vcc
2939; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
2940; GISEL-NEXT:    s_cbranch_execz .LBB3_6
2941; GISEL-NEXT:  ; %bb.1: ; %udiv-bb15
2942; GISEL-NEXT:    v_add_i32_e32 v30, vcc, 1, v18
2943; GISEL-NEXT:    v_addc_u32_e64 v31, s[4:5], 0, v19, vcc
2944; GISEL-NEXT:    v_sub_i32_e32 v26, vcc, 0x7f, v18
2945; GISEL-NEXT:    v_not_b32_e32 v18, 63
2946; GISEL-NEXT:    v_addc_u32_e64 v32, vcc, 0, v16, s[4:5]
2947; GISEL-NEXT:    v_addc_u32_e32 v33, vcc, 0, v17, vcc
2948; GISEL-NEXT:    v_add_i32_e64 v22, s[4:5], v26, v18
2949; GISEL-NEXT:    v_sub_i32_e64 v20, s[4:5], 64, v26
2950; GISEL-NEXT:    v_lshl_b64 v[16:17], v[0:1], v26
2951; GISEL-NEXT:    v_lshl_b64 v[18:19], v[2:3], v26
2952; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
2953; GISEL-NEXT:    v_lshr_b64 v[20:21], v[0:1], v20
2954; GISEL-NEXT:    v_lshl_b64 v[24:25], v[0:1], v22
2955; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v26
2956; GISEL-NEXT:    v_cndmask_b32_e32 v22, 0, v16, vcc
2957; GISEL-NEXT:    v_cndmask_b32_e32 v23, 0, v17, vcc
2958; GISEL-NEXT:    v_or_b32_e32 v16, v20, v18
2959; GISEL-NEXT:    v_or_b32_e32 v17, v21, v19
2960; GISEL-NEXT:    v_cndmask_b32_e32 v16, v24, v16, vcc
2961; GISEL-NEXT:    v_cndmask_b32_e32 v17, v25, v17, vcc
2962; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v26
2963; GISEL-NEXT:    v_cndmask_b32_e32 v20, v16, v2, vcc
2964; GISEL-NEXT:    v_cndmask_b32_e32 v21, v17, v3, vcc
2965; GISEL-NEXT:    s_mov_b64 s[10:11], s[8:9]
2966; GISEL-NEXT:    v_mov_b32_e32 v19, s11
2967; GISEL-NEXT:    v_mov_b32_e32 v18, s10
2968; GISEL-NEXT:    v_mov_b32_e32 v17, s9
2969; GISEL-NEXT:    v_mov_b32_e32 v16, s8
2970; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
2971; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
2972; GISEL-NEXT:    s_cbranch_execz .LBB3_5
2973; GISEL-NEXT:  ; %bb.2: ; %udiv-preheader4
2974; GISEL-NEXT:    v_add_i32_e32 v26, vcc, 0xffffffc0, v30
2975; GISEL-NEXT:    v_sub_i32_e32 v24, vcc, 64, v30
2976; GISEL-NEXT:    v_lshr_b64 v[16:17], v[2:3], v30
2977; GISEL-NEXT:    v_lshr_b64 v[18:19], v[0:1], v30
2978; GISEL-NEXT:    s_mov_b64 s[4:5], 0
2979; GISEL-NEXT:    v_add_i32_e32 v34, vcc, -1, v8
2980; GISEL-NEXT:    v_addc_u32_e32 v35, vcc, -1, v9, vcc
2981; GISEL-NEXT:    v_lshl_b64 v[24:25], v[2:3], v24
2982; GISEL-NEXT:    v_lshr_b64 v[26:27], v[2:3], v26
2983; GISEL-NEXT:    v_addc_u32_e32 v36, vcc, -1, v10, vcc
2984; GISEL-NEXT:    v_addc_u32_e32 v37, vcc, -1, v11, vcc
2985; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
2986; GISEL-NEXT:    v_or_b32_e32 v18, v18, v24
2987; GISEL-NEXT:    v_or_b32_e32 v19, v19, v25
2988; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v30
2989; GISEL-NEXT:    v_cndmask_b32_e32 v18, v26, v18, vcc
2990; GISEL-NEXT:    v_cndmask_b32_e32 v19, v27, v19, vcc
2991; GISEL-NEXT:    v_cndmask_b32_e32 v26, 0, v16, vcc
2992; GISEL-NEXT:    v_cndmask_b32_e32 v27, 0, v17, vcc
2993; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v30
2994; GISEL-NEXT:    v_cndmask_b32_e32 v28, v18, v0, vcc
2995; GISEL-NEXT:    v_cndmask_b32_e32 v29, v19, v1, vcc
2996; GISEL-NEXT:    v_mov_b32_e32 v25, 0
2997; GISEL-NEXT:    v_mov_b32_e32 v19, s7
2998; GISEL-NEXT:    v_mov_b32_e32 v18, s6
2999; GISEL-NEXT:    v_mov_b32_e32 v17, s5
3000; GISEL-NEXT:    v_mov_b32_e32 v16, s4
3001; GISEL-NEXT:  .LBB3_3: ; %udiv-do-while3
3002; GISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
3003; GISEL-NEXT:    v_lshrrev_b32_e32 v38, 31, v23
3004; GISEL-NEXT:    v_lshl_b64 v[18:19], v[22:23], 1
3005; GISEL-NEXT:    v_lshrrev_b32_e32 v24, 31, v29
3006; GISEL-NEXT:    v_lshl_b64 v[28:29], v[28:29], 1
3007; GISEL-NEXT:    v_lshl_b64 v[26:27], v[26:27], 1
3008; GISEL-NEXT:    v_lshrrev_b32_e32 v39, 31, v21
3009; GISEL-NEXT:    v_add_i32_e32 v30, vcc, -1, v30
3010; GISEL-NEXT:    v_addc_u32_e32 v31, vcc, -1, v31, vcc
3011; GISEL-NEXT:    v_lshl_b64 v[20:21], v[20:21], 1
3012; GISEL-NEXT:    v_or_b32_e32 v22, v16, v18
3013; GISEL-NEXT:    v_or_b32_e32 v23, v17, v19
3014; GISEL-NEXT:    v_or_b32_e32 v18, v26, v24
3015; GISEL-NEXT:    v_or_b32_e32 v19, v28, v39
3016; GISEL-NEXT:    v_addc_u32_e32 v32, vcc, -1, v32, vcc
3017; GISEL-NEXT:    v_addc_u32_e32 v33, vcc, -1, v33, vcc
3018; GISEL-NEXT:    v_sub_i32_e32 v16, vcc, v34, v19
3019; GISEL-NEXT:    v_subb_u32_e32 v16, vcc, v35, v29, vcc
3020; GISEL-NEXT:    v_or_b32_e32 v16, v30, v32
3021; GISEL-NEXT:    v_or_b32_e32 v17, v31, v33
3022; GISEL-NEXT:    v_subb_u32_e32 v24, vcc, v36, v18, vcc
3023; GISEL-NEXT:    v_subb_u32_e32 v24, vcc, v37, v27, vcc
3024; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
3025; GISEL-NEXT:    v_ashrrev_i32_e32 v16, 31, v24
3026; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3027; GISEL-NEXT:    v_and_b32_e32 v17, v16, v8
3028; GISEL-NEXT:    v_and_b32_e32 v26, v16, v9
3029; GISEL-NEXT:    v_and_b32_e32 v39, v16, v10
3030; GISEL-NEXT:    v_and_b32_e32 v48, v16, v11
3031; GISEL-NEXT:    v_and_b32_e32 v24, 1, v16
3032; GISEL-NEXT:    v_sub_i32_e32 v28, vcc, v19, v17
3033; GISEL-NEXT:    v_subb_u32_e32 v29, vcc, v29, v26, vcc
3034; GISEL-NEXT:    v_subb_u32_e32 v26, vcc, v18, v39, vcc
3035; GISEL-NEXT:    v_subb_u32_e32 v27, vcc, v27, v48, vcc
3036; GISEL-NEXT:    v_or_b32_e32 v20, v20, v38
3037; GISEL-NEXT:    v_mov_b32_e32 v16, v24
3038; GISEL-NEXT:    v_mov_b32_e32 v17, v25
3039; GISEL-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3040; GISEL-NEXT:    s_cbranch_execnz .LBB3_3
3041; GISEL-NEXT:  ; %bb.4: ; %Flow13
3042; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
3043; GISEL-NEXT:  .LBB3_5: ; %Flow14
3044; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
3045; GISEL-NEXT:    v_lshl_b64 v[18:19], v[22:23], 1
3046; GISEL-NEXT:    v_lshl_b64 v[20:21], v[20:21], 1
3047; GISEL-NEXT:    v_lshrrev_b32_e32 v22, 31, v23
3048; GISEL-NEXT:    v_or_b32_e32 v20, v20, v22
3049; GISEL-NEXT:    v_or_b32_e32 v32, v16, v18
3050; GISEL-NEXT:    v_or_b32_e32 v33, v17, v19
3051; GISEL-NEXT:  .LBB3_6: ; %Flow16
3052; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
3053; GISEL-NEXT:    s_mov_b64 s[8:9], 0
3054; GISEL-NEXT:    v_or_b32_e32 v16, v12, v14
3055; GISEL-NEXT:    v_or_b32_e32 v17, v13, v15
3056; GISEL-NEXT:    v_or_b32_e32 v18, v4, v6
3057; GISEL-NEXT:    v_or_b32_e32 v19, v5, v7
3058; GISEL-NEXT:    v_ffbh_u32_e32 v22, v13
3059; GISEL-NEXT:    v_ffbh_u32_e32 v23, v12
3060; GISEL-NEXT:    v_ffbh_u32_e32 v26, v15
3061; GISEL-NEXT:    v_ffbh_u32_e32 v27, v14
3062; GISEL-NEXT:    v_ffbh_u32_e32 v28, v5
3063; GISEL-NEXT:    v_ffbh_u32_e32 v29, v4
3064; GISEL-NEXT:    v_ffbh_u32_e32 v30, v7
3065; GISEL-NEXT:    v_ffbh_u32_e32 v31, v6
3066; GISEL-NEXT:    v_mov_b32_e32 v24, 0x7f
3067; GISEL-NEXT:    v_mov_b32_e32 v25, 0
3068; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
3069; GISEL-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
3070; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], 32, v23
3071; GISEL-NEXT:    v_add_i32_e64 v17, s[6:7], 32, v27
3072; GISEL-NEXT:    v_add_i32_e64 v18, s[6:7], 32, v29
3073; GISEL-NEXT:    v_add_i32_e64 v19, s[6:7], 32, v31
3074; GISEL-NEXT:    v_min_u32_e32 v16, v22, v16
3075; GISEL-NEXT:    v_min_u32_e32 v17, v26, v17
3076; GISEL-NEXT:    v_min_u32_e32 v18, v28, v18
3077; GISEL-NEXT:    v_min_u32_e32 v19, v30, v19
3078; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3079; GISEL-NEXT:    v_cndmask_b32_e64 v26, 0, 1, s[4:5]
3080; GISEL-NEXT:    v_add_i32_e32 v16, vcc, 64, v16
3081; GISEL-NEXT:    v_add_i32_e32 v18, vcc, 64, v18
3082; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
3083; GISEL-NEXT:    v_cndmask_b32_e32 v16, v17, v16, vcc
3084; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
3085; GISEL-NEXT:    v_cndmask_b32_e32 v17, v19, v18, vcc
3086; GISEL-NEXT:    v_sub_i32_e32 v22, vcc, v16, v17
3087; GISEL-NEXT:    v_subb_u32_e64 v23, s[4:5], 0, 0, vcc
3088; GISEL-NEXT:    v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5]
3089; GISEL-NEXT:    v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5]
3090; GISEL-NEXT:    v_cmp_gt_u64_e32 vcc, v[22:23], v[24:25]
3091; GISEL-NEXT:    v_cndmask_b32_e64 v24, 0, 1, vcc
3092; GISEL-NEXT:    v_xor_b32_e32 v18, 0x7f, v22
3093; GISEL-NEXT:    v_cmp_lt_u64_e32 vcc, 0, v[16:17]
3094; GISEL-NEXT:    v_cndmask_b32_e64 v25, 0, 1, vcc
3095; GISEL-NEXT:    v_or_b32_e32 v18, v18, v16
3096; GISEL-NEXT:    v_or_b32_e32 v19, v23, v17
3097; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
3098; GISEL-NEXT:    v_cndmask_b32_e32 v24, v25, v24, vcc
3099; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[18:19]
3100; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
3101; GISEL-NEXT:    v_or_b32_e32 v19, v26, v24
3102; GISEL-NEXT:    v_and_b32_e32 v24, 1, v19
3103; GISEL-NEXT:    v_or_b32_e32 v18, v19, v18
3104; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v24
3105; GISEL-NEXT:    v_cndmask_b32_e64 v24, v4, 0, vcc
3106; GISEL-NEXT:    v_and_b32_e32 v26, 1, v18
3107; GISEL-NEXT:    v_cndmask_b32_e64 v25, v5, 0, vcc
3108; GISEL-NEXT:    v_cndmask_b32_e64 v18, v6, 0, vcc
3109; GISEL-NEXT:    v_cndmask_b32_e64 v19, v7, 0, vcc
3110; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v26
3111; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
3112; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], s[4:5]
3113; GISEL-NEXT:    s_cbranch_execz .LBB3_12
3114; GISEL-NEXT:  ; %bb.7: ; %udiv-bb1
3115; GISEL-NEXT:    v_add_i32_e32 v34, vcc, 1, v22
3116; GISEL-NEXT:    v_addc_u32_e64 v35, s[4:5], 0, v23, vcc
3117; GISEL-NEXT:    v_sub_i32_e32 v28, vcc, 0x7f, v22
3118; GISEL-NEXT:    v_not_b32_e32 v18, 63
3119; GISEL-NEXT:    v_addc_u32_e64 v36, vcc, 0, v16, s[4:5]
3120; GISEL-NEXT:    v_addc_u32_e32 v37, vcc, 0, v17, vcc
3121; GISEL-NEXT:    v_add_i32_e64 v24, s[4:5], v28, v18
3122; GISEL-NEXT:    v_sub_i32_e64 v22, s[4:5], 64, v28
3123; GISEL-NEXT:    v_lshl_b64 v[16:17], v[4:5], v28
3124; GISEL-NEXT:    v_lshl_b64 v[18:19], v[6:7], v28
3125; GISEL-NEXT:    s_xor_b64 s[4:5], vcc, -1
3126; GISEL-NEXT:    v_lshr_b64 v[22:23], v[4:5], v22
3127; GISEL-NEXT:    v_lshl_b64 v[26:27], v[4:5], v24
3128; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v28
3129; GISEL-NEXT:    v_cndmask_b32_e32 v24, 0, v16, vcc
3130; GISEL-NEXT:    v_cndmask_b32_e32 v25, 0, v17, vcc
3131; GISEL-NEXT:    v_or_b32_e32 v16, v22, v18
3132; GISEL-NEXT:    v_or_b32_e32 v17, v23, v19
3133; GISEL-NEXT:    v_cndmask_b32_e32 v16, v26, v16, vcc
3134; GISEL-NEXT:    v_cndmask_b32_e32 v17, v27, v17, vcc
3135; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v28
3136; GISEL-NEXT:    v_cndmask_b32_e32 v22, v16, v6, vcc
3137; GISEL-NEXT:    v_cndmask_b32_e32 v23, v17, v7, vcc
3138; GISEL-NEXT:    s_mov_b64 s[10:11], s[8:9]
3139; GISEL-NEXT:    v_mov_b32_e32 v19, s11
3140; GISEL-NEXT:    v_mov_b32_e32 v18, s10
3141; GISEL-NEXT:    v_mov_b32_e32 v17, s9
3142; GISEL-NEXT:    v_mov_b32_e32 v16, s8
3143; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
3144; GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[6:7]
3145; GISEL-NEXT:    s_cbranch_execz .LBB3_11
3146; GISEL-NEXT:  ; %bb.8: ; %udiv-preheader
3147; GISEL-NEXT:    v_add_i32_e32 v28, vcc, 0xffffffc0, v34
3148; GISEL-NEXT:    v_sub_i32_e32 v26, vcc, 64, v34
3149; GISEL-NEXT:    v_lshr_b64 v[16:17], v[6:7], v34
3150; GISEL-NEXT:    v_lshr_b64 v[18:19], v[4:5], v34
3151; GISEL-NEXT:    s_mov_b64 s[4:5], 0
3152; GISEL-NEXT:    v_add_i32_e32 v38, vcc, -1, v12
3153; GISEL-NEXT:    v_addc_u32_e32 v39, vcc, -1, v13, vcc
3154; GISEL-NEXT:    v_lshl_b64 v[26:27], v[6:7], v26
3155; GISEL-NEXT:    v_lshr_b64 v[28:29], v[6:7], v28
3156; GISEL-NEXT:    v_addc_u32_e32 v48, vcc, -1, v14, vcc
3157; GISEL-NEXT:    v_addc_u32_e32 v49, vcc, -1, v15, vcc
3158; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
3159; GISEL-NEXT:    v_or_b32_e32 v18, v18, v26
3160; GISEL-NEXT:    v_or_b32_e32 v19, v19, v27
3161; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v34
3162; GISEL-NEXT:    v_cndmask_b32_e32 v18, v28, v18, vcc
3163; GISEL-NEXT:    v_cndmask_b32_e32 v19, v29, v19, vcc
3164; GISEL-NEXT:    v_cndmask_b32_e32 v30, 0, v16, vcc
3165; GISEL-NEXT:    v_cndmask_b32_e32 v31, 0, v17, vcc
3166; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v34
3167; GISEL-NEXT:    v_cndmask_b32_e32 v28, v18, v4, vcc
3168; GISEL-NEXT:    v_cndmask_b32_e32 v29, v19, v5, vcc
3169; GISEL-NEXT:    v_mov_b32_e32 v27, 0
3170; GISEL-NEXT:    v_mov_b32_e32 v19, s7
3171; GISEL-NEXT:    v_mov_b32_e32 v18, s6
3172; GISEL-NEXT:    v_mov_b32_e32 v17, s5
3173; GISEL-NEXT:    v_mov_b32_e32 v16, s4
3174; GISEL-NEXT:  .LBB3_9: ; %udiv-do-while
3175; GISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
3176; GISEL-NEXT:    v_lshl_b64 v[18:19], v[24:25], 1
3177; GISEL-NEXT:    v_lshrrev_b32_e32 v26, 31, v25
3178; GISEL-NEXT:    v_lshl_b64 v[50:51], v[28:29], 1
3179; GISEL-NEXT:    v_lshl_b64 v[30:31], v[30:31], 1
3180; GISEL-NEXT:    v_lshrrev_b32_e32 v28, 31, v29
3181; GISEL-NEXT:    v_lshrrev_b32_e32 v29, 31, v23
3182; GISEL-NEXT:    v_lshl_b64 v[22:23], v[22:23], 1
3183; GISEL-NEXT:    v_add_i32_e32 v34, vcc, -1, v34
3184; GISEL-NEXT:    v_addc_u32_e32 v35, vcc, -1, v35, vcc
3185; GISEL-NEXT:    v_or_b32_e32 v24, v16, v18
3186; GISEL-NEXT:    v_or_b32_e32 v25, v17, v19
3187; GISEL-NEXT:    v_or_b32_e32 v18, v30, v28
3188; GISEL-NEXT:    v_or_b32_e32 v19, v50, v29
3189; GISEL-NEXT:    v_or_b32_e32 v22, v22, v26
3190; GISEL-NEXT:    v_addc_u32_e32 v36, vcc, -1, v36, vcc
3191; GISEL-NEXT:    v_addc_u32_e32 v37, vcc, -1, v37, vcc
3192; GISEL-NEXT:    v_sub_i32_e32 v16, vcc, v38, v19
3193; GISEL-NEXT:    v_subb_u32_e32 v16, vcc, v39, v51, vcc
3194; GISEL-NEXT:    v_or_b32_e32 v16, v34, v36
3195; GISEL-NEXT:    v_or_b32_e32 v17, v35, v37
3196; GISEL-NEXT:    v_subb_u32_e32 v26, vcc, v48, v18, vcc
3197; GISEL-NEXT:    v_subb_u32_e32 v26, vcc, v49, v31, vcc
3198; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
3199; GISEL-NEXT:    v_ashrrev_i32_e32 v16, 31, v26
3200; GISEL-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3201; GISEL-NEXT:    v_and_b32_e32 v26, 1, v16
3202; GISEL-NEXT:    v_and_b32_e32 v17, v16, v12
3203; GISEL-NEXT:    v_and_b32_e32 v29, v16, v13
3204; GISEL-NEXT:    v_and_b32_e32 v30, v16, v14
3205; GISEL-NEXT:    v_and_b32_e32 v50, v16, v15
3206; GISEL-NEXT:    v_sub_i32_e32 v28, vcc, v19, v17
3207; GISEL-NEXT:    v_subb_u32_e32 v29, vcc, v51, v29, vcc
3208; GISEL-NEXT:    v_mov_b32_e32 v16, v26
3209; GISEL-NEXT:    v_mov_b32_e32 v17, v27
3210; GISEL-NEXT:    v_subb_u32_e32 v30, vcc, v18, v30, vcc
3211; GISEL-NEXT:    v_subb_u32_e32 v31, vcc, v31, v50, vcc
3212; GISEL-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3213; GISEL-NEXT:    s_cbranch_execnz .LBB3_9
3214; GISEL-NEXT:  ; %bb.10: ; %Flow
3215; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
3216; GISEL-NEXT:  .LBB3_11: ; %Flow11
3217; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
3218; GISEL-NEXT:    v_lshl_b64 v[26:27], v[24:25], 1
3219; GISEL-NEXT:    v_lshl_b64 v[18:19], v[22:23], 1
3220; GISEL-NEXT:    v_lshrrev_b32_e32 v22, 31, v25
3221; GISEL-NEXT:    v_or_b32_e32 v18, v18, v22
3222; GISEL-NEXT:    v_or_b32_e32 v24, v16, v26
3223; GISEL-NEXT:    v_or_b32_e32 v25, v17, v27
3224; GISEL-NEXT:  .LBB3_12: ; %Flow12
3225; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
3226; GISEL-NEXT:    v_mad_u64_u32 v[16:17], s[4:5], v8, v32, 0
3227; GISEL-NEXT:    v_mad_u64_u32 v[22:23], s[4:5], v8, v20, 0
3228; GISEL-NEXT:    v_mul_lo_u32 v28, v8, v21
3229; GISEL-NEXT:    v_mul_lo_u32 v29, v9, v20
3230; GISEL-NEXT:    v_mad_u64_u32 v[20:21], s[4:5], v12, v24, 0
3231; GISEL-NEXT:    v_mad_u64_u32 v[26:27], s[4:5], v12, v18, 0
3232; GISEL-NEXT:    v_mul_lo_u32 v30, v12, v19
3233; GISEL-NEXT:    v_mul_lo_u32 v31, v13, v18
3234; GISEL-NEXT:    v_mad_u64_u32 v[18:19], s[4:5], v9, v33, v[22:23]
3235; GISEL-NEXT:    v_mad_u64_u32 v[22:23], s[4:5], v13, v25, v[26:27]
3236; GISEL-NEXT:    v_mad_u64_u32 v[18:19], s[4:5], v10, v32, v[18:19]
3237; GISEL-NEXT:    v_mad_u64_u32 v[22:23], s[4:5], v14, v24, v[22:23]
3238; GISEL-NEXT:    v_mad_u64_u32 v[17:18], vcc, v8, v33, v[17:18]
3239; GISEL-NEXT:    v_mad_u64_u32 v[21:22], s[4:5], v12, v25, v[21:22]
3240; GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[6:7], v9, v32, v[17:18]
3241; GISEL-NEXT:    v_addc_u32_e64 v17, s[6:7], v19, v28, s[6:7]
3242; GISEL-NEXT:    v_mad_u64_u32 v[12:13], s[6:7], v13, v24, v[21:22]
3243; GISEL-NEXT:    v_addc_u32_e64 v18, s[6:7], v23, v30, s[6:7]
3244; GISEL-NEXT:    v_addc_u32_e32 v17, vcc, v17, v29, vcc
3245; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v16
3246; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v8, vcc
3247; GISEL-NEXT:    v_addc_u32_e64 v8, s[4:5], v18, v31, s[4:5]
3248; GISEL-NEXT:    v_sub_i32_e64 v4, s[4:5], v4, v20
3249; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v5, v12, s[4:5]
3250; GISEL-NEXT:    v_mad_u64_u32 v[16:17], s[6:7], v10, v33, v[17:18]
3251; GISEL-NEXT:    v_mad_u64_u32 v[18:19], s[6:7], v14, v25, v[8:9]
3252; GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[6:7], v11, v32, v[16:17]
3253; GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[6:7], v15, v24, v[18:19]
3254; GISEL-NEXT:    v_subb_u32_e32 v2, vcc, v2, v9, vcc
3255; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v10, vcc
3256; GISEL-NEXT:    v_subb_u32_e64 v6, vcc, v6, v13, s[4:5]
3257; GISEL-NEXT:    v_subb_u32_e32 v7, vcc, v7, v11, vcc
3258; GISEL-NEXT:    s_setpc_b64 s[30:31]
3259  %shl = urem <2 x i128> %lhs, %rhs
3260  ret <2 x i128> %shl
3261}
3262