xref: /llvm-project/llvm/test/CodeGen/AMDGPU/rem_i128.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
3; RUN: llc -O0 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9-O0,GFX9-SDAG-O0 %s
4
5; FIXME: GlobalISel missing the power-of-2 cases in legalization. https://github.com/llvm/llvm-project/issues/80671
6; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9,GFX9 %s
7; xUN: llc -O0 -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9-O0,GFX9-O0 %s}}
8
9define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
10; GFX9-LABEL: v_srem_i128_vv:
11; GFX9:       ; %bb.0: ; %_udiv-special-cases
12; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GFX9-NEXT:    v_sub_co_u32_e32 v8, vcc, 0, v0
14; GFX9-NEXT:    v_subb_co_u32_e32 v9, vcc, 0, v1, vcc
15; GFX9-NEXT:    v_subb_co_u32_e32 v10, vcc, 0, v2, vcc
16; GFX9-NEXT:    v_subb_co_u32_e32 v11, vcc, 0, v3, vcc
17; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
18; GFX9-NEXT:    v_ashrrev_i32_e32 v20, 31, v3
19; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
20; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
21; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
22; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
23; GFX9-NEXT:    v_sub_co_u32_e32 v8, vcc, 0, v4
24; GFX9-NEXT:    v_subb_co_u32_e32 v9, vcc, 0, v5, vcc
25; GFX9-NEXT:    v_subb_co_u32_e32 v10, vcc, 0, v6, vcc
26; GFX9-NEXT:    v_subb_co_u32_e32 v11, vcc, 0, v7, vcc
27; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[6:7]
28; GFX9-NEXT:    v_mov_b32_e32 v21, v20
29; GFX9-NEXT:    v_cndmask_b32_e32 v22, v5, v9, vcc
30; GFX9-NEXT:    v_cndmask_b32_e32 v23, v4, v8, vcc
31; GFX9-NEXT:    v_cndmask_b32_e32 v5, v7, v11, vcc
32; GFX9-NEXT:    v_cndmask_b32_e32 v4, v6, v10, vcc
33; GFX9-NEXT:    v_or_b32_e32 v7, v22, v5
34; GFX9-NEXT:    v_or_b32_e32 v6, v23, v4
35; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[6:7]
36; GFX9-NEXT:    v_or_b32_e32 v7, v1, v3
37; GFX9-NEXT:    v_or_b32_e32 v6, v0, v2
38; GFX9-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[6:7]
39; GFX9-NEXT:    v_ffbh_u32_e32 v6, v4
40; GFX9-NEXT:    v_add_u32_e32 v6, 32, v6
41; GFX9-NEXT:    v_ffbh_u32_e32 v7, v5
42; GFX9-NEXT:    v_min_u32_e32 v6, v6, v7
43; GFX9-NEXT:    v_ffbh_u32_e32 v7, v23
44; GFX9-NEXT:    v_add_u32_e32 v7, 32, v7
45; GFX9-NEXT:    v_ffbh_u32_e32 v8, v22
46; GFX9-NEXT:    v_min_u32_e32 v7, v7, v8
47; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
48; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, 64, v7
49; GFX9-NEXT:    v_addc_co_u32_e64 v8, s[6:7], 0, 0, vcc
50; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
51; GFX9-NEXT:    v_ffbh_u32_e32 v10, v3
52; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
53; GFX9-NEXT:    v_ffbh_u32_e32 v7, v2
54; GFX9-NEXT:    v_add_u32_e32 v7, 32, v7
55; GFX9-NEXT:    v_min_u32_e32 v7, v7, v10
56; GFX9-NEXT:    v_ffbh_u32_e32 v10, v0
57; GFX9-NEXT:    v_add_u32_e32 v10, 32, v10
58; GFX9-NEXT:    v_ffbh_u32_e32 v11, v1
59; GFX9-NEXT:    v_min_u32_e32 v10, v10, v11
60; GFX9-NEXT:    v_cndmask_b32_e64 v8, v8, 0, vcc
61; GFX9-NEXT:    v_add_co_u32_e32 v10, vcc, 64, v10
62; GFX9-NEXT:    v_addc_co_u32_e64 v11, s[6:7], 0, 0, vcc
63; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
64; GFX9-NEXT:    v_mov_b32_e32 v9, 0
65; GFX9-NEXT:    v_cndmask_b32_e32 v7, v10, v7, vcc
66; GFX9-NEXT:    v_cndmask_b32_e64 v11, v11, 0, vcc
67; GFX9-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v7
68; GFX9-NEXT:    v_subb_co_u32_e32 v7, vcc, v8, v11, vcc
69; GFX9-NEXT:    v_subbrev_co_u32_e32 v8, vcc, 0, v9, vcc
70; GFX9-NEXT:    v_subbrev_co_u32_e32 v9, vcc, 0, v9, vcc
71; GFX9-NEXT:    s_mov_b64 s[6:7], 0x7f
72; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[6:7]
73; GFX9-NEXT:    v_or_b32_e32 v12, v7, v9
74; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
75; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
76; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
77; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
78; GFX9-NEXT:    v_cndmask_b32_e32 v10, v11, v10, vcc
79; GFX9-NEXT:    v_and_b32_e32 v10, 1, v10
80; GFX9-NEXT:    v_xor_b32_e32 v11, 0x7f, v6
81; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v10
82; GFX9-NEXT:    v_or_b32_e32 v11, v11, v8
83; GFX9-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
84; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[11:12]
85; GFX9-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
86; GFX9-NEXT:    v_cndmask_b32_e64 v10, v3, 0, s[4:5]
87; GFX9-NEXT:    v_cndmask_b32_e64 v12, v2, 0, s[4:5]
88; GFX9-NEXT:    v_cndmask_b32_e64 v11, v1, 0, s[4:5]
89; GFX9-NEXT:    v_cndmask_b32_e64 v13, v0, 0, s[4:5]
90; GFX9-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
91; GFX9-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
92; GFX9-NEXT:    s_cbranch_execz .LBB0_6
93; GFX9-NEXT:  ; %bb.1: ; %udiv-bb1
94; GFX9-NEXT:    v_add_co_u32_e32 v24, vcc, 1, v6
95; GFX9-NEXT:    v_addc_co_u32_e32 v25, vcc, 0, v7, vcc
96; GFX9-NEXT:    v_addc_co_u32_e32 v26, vcc, 0, v8, vcc
97; GFX9-NEXT:    v_sub_u32_e32 v13, 0x7f, v6
98; GFX9-NEXT:    v_addc_co_u32_e32 v27, vcc, 0, v9, vcc
99; GFX9-NEXT:    v_sub_u32_e32 v11, 64, v13
100; GFX9-NEXT:    v_or_b32_e32 v8, v25, v27
101; GFX9-NEXT:    v_or_b32_e32 v7, v24, v26
102; GFX9-NEXT:    v_lshlrev_b64 v[9:10], v13, v[2:3]
103; GFX9-NEXT:    v_lshrrev_b64 v[11:12], v11, v[0:1]
104; GFX9-NEXT:    v_sub_u32_e32 v6, 63, v6
105; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[7:8]
106; GFX9-NEXT:    v_lshlrev_b64 v[6:7], v6, v[0:1]
107; GFX9-NEXT:    v_or_b32_e32 v8, v10, v12
108; GFX9-NEXT:    v_or_b32_e32 v9, v9, v11
109; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v13
110; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v13, v[0:1]
111; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
112; GFX9-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v13
113; GFX9-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[4:5]
114; GFX9-NEXT:    v_mov_b32_e32 v8, 0
115; GFX9-NEXT:    v_mov_b32_e32 v12, 0
116; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, v3, s[6:7]
117; GFX9-NEXT:    v_cndmask_b32_e64 v6, v6, v2, s[6:7]
118; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, v11, s[4:5]
119; GFX9-NEXT:    v_mov_b32_e32 v9, 0
120; GFX9-NEXT:    v_mov_b32_e32 v13, 0
121; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, v10, s[4:5]
122; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
123; GFX9-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
124; GFX9-NEXT:    s_cbranch_execz .LBB0_5
125; GFX9-NEXT:  ; %bb.2: ; %udiv-preheader
126; GFX9-NEXT:    v_sub_u32_e32 v12, 64, v24
127; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v24, v[0:1]
128; GFX9-NEXT:    v_lshlrev_b64 v[12:13], v12, v[2:3]
129; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v24
130; GFX9-NEXT:    v_or_b32_e32 v12, v8, v12
131; GFX9-NEXT:    v_subrev_u32_e32 v8, 64, v24
132; GFX9-NEXT:    v_or_b32_e32 v13, v9, v13
133; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v8, v[2:3]
134; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v24
135; GFX9-NEXT:    v_cndmask_b32_e32 v9, v9, v13, vcc
136; GFX9-NEXT:    v_cndmask_b32_e64 v15, v9, v1, s[4:5]
137; GFX9-NEXT:    v_cndmask_b32_e32 v12, v8, v12, vcc
138; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v24, v[2:3]
139; GFX9-NEXT:    v_cndmask_b32_e64 v14, v12, v0, s[4:5]
140; GFX9-NEXT:    v_cndmask_b32_e32 v17, 0, v9, vcc
141; GFX9-NEXT:    v_cndmask_b32_e32 v16, 0, v8, vcc
142; GFX9-NEXT:    v_add_co_u32_e32 v28, vcc, -1, v23
143; GFX9-NEXT:    v_addc_co_u32_e32 v29, vcc, -1, v22, vcc
144; GFX9-NEXT:    v_addc_co_u32_e32 v30, vcc, -1, v4, vcc
145; GFX9-NEXT:    v_mov_b32_e32 v18, 0
146; GFX9-NEXT:    v_mov_b32_e32 v12, 0
147; GFX9-NEXT:    v_addc_co_u32_e32 v31, vcc, -1, v5, vcc
148; GFX9-NEXT:    s_mov_b64 s[4:5], 0
149; GFX9-NEXT:    v_mov_b32_e32 v19, 0
150; GFX9-NEXT:    v_mov_b32_e32 v13, 0
151; GFX9-NEXT:    v_mov_b32_e32 v9, 0
152; GFX9-NEXT:  .LBB0_3: ; %udiv-do-while
153; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
154; GFX9-NEXT:    v_lshrrev_b32_e32 v32, 31, v15
155; GFX9-NEXT:    v_lshlrev_b64 v[14:15], 1, v[14:15]
156; GFX9-NEXT:    v_lshrrev_b32_e32 v33, 31, v7
157; GFX9-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
158; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 31, v11
159; GFX9-NEXT:    v_lshlrev_b64 v[16:17], 1, v[16:17]
160; GFX9-NEXT:    v_or_b32_e32 v14, v14, v33
161; GFX9-NEXT:    v_or3_b32 v6, v6, v8, v12
162; GFX9-NEXT:    v_sub_co_u32_e32 v8, vcc, v28, v14
163; GFX9-NEXT:    v_or_b32_e32 v16, v16, v32
164; GFX9-NEXT:    v_subb_co_u32_e32 v8, vcc, v29, v15, vcc
165; GFX9-NEXT:    v_subb_co_u32_e32 v8, vcc, v30, v16, vcc
166; GFX9-NEXT:    v_lshlrev_b64 v[10:11], 1, v[10:11]
167; GFX9-NEXT:    v_subb_co_u32_e32 v8, vcc, v31, v17, vcc
168; GFX9-NEXT:    v_ashrrev_i32_e32 v8, 31, v8
169; GFX9-NEXT:    v_or_b32_e32 v10, v18, v10
170; GFX9-NEXT:    v_and_b32_e32 v18, v8, v23
171; GFX9-NEXT:    v_or_b32_e32 v11, v19, v11
172; GFX9-NEXT:    v_and_b32_e32 v19, v8, v22
173; GFX9-NEXT:    v_sub_co_u32_e32 v14, vcc, v14, v18
174; GFX9-NEXT:    v_and_b32_e32 v32, v8, v4
175; GFX9-NEXT:    v_subb_co_u32_e32 v15, vcc, v15, v19, vcc
176; GFX9-NEXT:    v_and_b32_e32 v33, v8, v5
177; GFX9-NEXT:    v_subb_co_u32_e32 v16, vcc, v16, v32, vcc
178; GFX9-NEXT:    v_subb_co_u32_e32 v17, vcc, v17, v33, vcc
179; GFX9-NEXT:    v_add_co_u32_e32 v24, vcc, -1, v24
180; GFX9-NEXT:    v_addc_co_u32_e32 v25, vcc, -1, v25, vcc
181; GFX9-NEXT:    v_addc_co_u32_e32 v26, vcc, -1, v26, vcc
182; GFX9-NEXT:    v_addc_co_u32_e32 v27, vcc, -1, v27, vcc
183; GFX9-NEXT:    v_or_b32_e32 v18, v24, v26
184; GFX9-NEXT:    v_or_b32_e32 v19, v25, v27
185; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[18:19]
186; GFX9-NEXT:    v_and_b32_e32 v8, 1, v8
187; GFX9-NEXT:    v_mov_b32_e32 v19, v9
188; GFX9-NEXT:    v_or3_b32 v7, v7, 0, v13
189; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
190; GFX9-NEXT:    v_mov_b32_e32 v18, v8
191; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
192; GFX9-NEXT:    s_cbranch_execnz .LBB0_3
193; GFX9-NEXT:  ; %bb.4: ; %Flow
194; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
195; GFX9-NEXT:  .LBB0_5: ; %Flow2
196; GFX9-NEXT:    s_or_b64 exec, exec, s[6:7]
197; GFX9-NEXT:    v_lshlrev_b64 v[14:15], 1, v[10:11]
198; GFX9-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
199; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 31, v11
200; GFX9-NEXT:    v_or3_b32 v10, v7, 0, v13
201; GFX9-NEXT:    v_or3_b32 v12, v6, v11, v12
202; GFX9-NEXT:    v_or_b32_e32 v11, v9, v15
203; GFX9-NEXT:    v_or_b32_e32 v13, v8, v14
204; GFX9-NEXT:  .LBB0_6: ; %Flow3
205; GFX9-NEXT:    s_or_b64 exec, exec, s[8:9]
206; GFX9-NEXT:    v_mul_lo_u32 v16, v13, v5
207; GFX9-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v23, v13, 0
208; GFX9-NEXT:    v_mov_b32_e32 v15, 0
209; GFX9-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v13, v4, 0
210; GFX9-NEXT:    v_mov_b32_e32 v14, v6
211; GFX9-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v22, v13, v[14:15]
212; GFX9-NEXT:    v_mul_lo_u32 v9, v11, v4
213; GFX9-NEXT:    v_mul_lo_u32 v10, v10, v23
214; GFX9-NEXT:    v_mov_b32_e32 v4, v14
215; GFX9-NEXT:    v_mov_b32_e32 v14, v15
216; GFX9-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v23, v11, v[13:14]
217; GFX9-NEXT:    v_add3_u32 v8, v8, v16, v9
218; GFX9-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v12, v23, v[7:8]
219; GFX9-NEXT:    v_mov_b32_e32 v8, v14
220; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v4, v8
221; GFX9-NEXT:    v_addc_co_u32_e64 v9, s[4:5], 0, 0, vcc
222; GFX9-NEXT:    v_mul_lo_u32 v12, v12, v22
223; GFX9-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v22, v11, v[8:9]
224; GFX9-NEXT:    v_add3_u32 v4, v10, v7, v12
225; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v8, v6
226; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v9, v4, vcc
227; GFX9-NEXT:    v_mov_b32_e32 v7, v13
228; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v5
229; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v7, vcc
230; GFX9-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v6, vcc
231; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
232; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v20
233; GFX9-NEXT:    v_xor_b32_e32 v1, v1, v21
234; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v20
235; GFX9-NEXT:    v_xor_b32_e32 v2, v2, v20
236; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v21, vcc
237; GFX9-NEXT:    v_xor_b32_e32 v3, v3, v21
238; GFX9-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v20, vcc
239; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v21, vcc
240; GFX9-NEXT:    s_setpc_b64 s[30:31]
241;
242; GFX9-O0-LABEL: v_srem_i128_vv:
243; GFX9-O0:       ; %bb.0: ; %_udiv-special-cases
244; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
246; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
247; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
248; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
249; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v6
250; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v4
251; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v2
252; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
253; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
254; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
255; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
256; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
257; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
258; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v1
259; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
260; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
261; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
262; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v5
263; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
264; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
265; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
266; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
267; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
268; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
269; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
270; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v3
271; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
272; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
273; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
274; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
275; GFX9-O0-NEXT:    s_mov_b32 s4, 63
276; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v11
277; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v12
278; GFX9-O0-NEXT:    v_ashrrev_i64 v[2:3], s4, v[2:3]
279; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
280; GFX9-O0-NEXT:    s_nop 0
281; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
282; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
283; GFX9-O0-NEXT:    s_nop 0
284; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
285; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
286; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
287; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v11
288; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v12
289; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
290; GFX9-O0-NEXT:    ; implicit-def: $vgpr30 : SGPR spill to VGPR lane
291; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 0
292; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 1
293; GFX9-O0-NEXT:    s_mov_b32 s10, s6
294; GFX9-O0-NEXT:    v_writelane_b32 v30, s10, 2
295; GFX9-O0-NEXT:    s_mov_b32 s11, s7
296; GFX9-O0-NEXT:    v_writelane_b32 v30, s11, 3
297; GFX9-O0-NEXT:    v_sub_co_u32_e32 v9, vcc, s10, v2
298; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
299; GFX9-O0-NEXT:    v_subb_co_u32_e32 v5, vcc, v4, v3, vcc
300; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s10
301; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v0, vcc
302; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s11
303; GFX9-O0-NEXT:    v_subb_co_u32_e32 v6, vcc, v6, v1, vcc
304; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
305; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
306; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
307; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
308; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
309; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[6:7]
310; GFX9-O0-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[11:12], s[4:5]
311; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[4:5]
312; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
313; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
314; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
315; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
316; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v2
317; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v3
318; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
319; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
320; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
321; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
322; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
323; GFX9-O0-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s[4:5]
324; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
325; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
326; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
327; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
328; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
329; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
330; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
331; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v8
332; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v17
333; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v18
334; GFX9-O0-NEXT:    v_sub_co_u32_e32 v13, vcc, s10, v6
335; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s11
336; GFX9-O0-NEXT:    v_subb_co_u32_e32 v11, vcc, v10, v9, vcc
337; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s10
338; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v7, vcc
339; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s11
340; GFX9-O0-NEXT:    v_subb_co_u32_e32 v12, vcc, v12, v8, vcc
341; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
342; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
343; GFX9-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
344; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v11
345; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v14
346; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[6:7]
347; GFX9-O0-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[17:18], s[4:5]
348; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
349; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v13
350; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v11, s[4:5]
351; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
352; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
353; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v6
354; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v9
355; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
356; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
357; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
358; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v12
359; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v11
360; GFX9-O0-NEXT:    v_cndmask_b32_e64 v8, v8, v12, s[4:5]
361; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec
362; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v7, v10, s[4:5]
363; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
364; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
365; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v7
366; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v8
367; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v5
368; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v4
369; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
370; GFX9-O0-NEXT:    s_nop 0
371; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
372; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v15
373; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v16
374; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
375; GFX9-O0-NEXT:    s_nop 0
376; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
377; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
378; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v10
379; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
380; GFX9-O0-NEXT:    s_nop 0
381; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
382; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v17
383; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v18
384; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
385; GFX9-O0-NEXT:    s_nop 0
386; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
387; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
388; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v10
389; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
390; GFX9-O0-NEXT:    s_nop 0
391; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
392; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v17
393; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v18
394; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
395; GFX9-O0-NEXT:    s_nop 0
396; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
397; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v5
398; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v4
399; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
400; GFX9-O0-NEXT:    s_nop 0
401; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
402; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v15
403; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v16
404; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
405; GFX9-O0-NEXT:    s_nop 0
406; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
407; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
408; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v18
409; GFX9-O0-NEXT:    v_or_b32_e64 v14, v12, v13
410; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v10
411; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v17
412; GFX9-O0-NEXT:    v_or_b32_e64 v12, v12, v13
413; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
414; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v14
415; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[12:13], s[6:7]
416; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v5
417; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v16
418; GFX9-O0-NEXT:    v_or_b32_e64 v14, v12, v13
419; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v4
420; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v15
421; GFX9-O0-NEXT:    v_or_b32_e64 v12, v12, v13
422; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
423; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v14
424; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[12:13], s[6:7]
425; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
426; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
427; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[10:11], s[8:9]
428; GFX9-O0-NEXT:    v_ffbh_u32_e64 v7, v7
429; GFX9-O0-NEXT:    s_mov_b32 s13, 32
430; GFX9-O0-NEXT:    v_add_u32_e64 v7, v7, s13
431; GFX9-O0-NEXT:    v_ffbh_u32_e64 v8, v8
432; GFX9-O0-NEXT:    v_min_u32_e64 v7, v7, v8
433; GFX9-O0-NEXT:    s_mov_b32 s12, 0
434; GFX9-O0-NEXT:    ; implicit-def: $sgpr14
435; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s12
436; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
437; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v10
438; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v8
439; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v6
440; GFX9-O0-NEXT:    v_add_u32_e64 v6, v6, s13
441; GFX9-O0-NEXT:    v_ffbh_u32_e64 v9, v9
442; GFX9-O0-NEXT:    v_min_u32_e64 v12, v6, v9
443; GFX9-O0-NEXT:    ; implicit-def: $sgpr14
444; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s12
445; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
446; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v6
447; GFX9-O0-NEXT:    s_mov_b64 s[14:15], 64
448; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v12
449; GFX9-O0-NEXT:    s_mov_b32 s16, s14
450; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v13
451; GFX9-O0-NEXT:    s_mov_b32 s18, s15
452; GFX9-O0-NEXT:    v_add_co_u32_e64 v9, s[16:17], v9, s16
453; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s18
454; GFX9-O0-NEXT:    v_addc_co_u32_e64 v6, s[16:17], v6, v10, s[16:17]
455; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
456; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v6
457; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
458; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v11, s[8:9]
459; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v7
460; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v9
461; GFX9-O0-NEXT:    v_cndmask_b32_e64 v8, v7, v8, s[8:9]
462; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
463; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
464; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
465; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v6
466; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
467; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[4:5], s[8:9]
468; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v0
469; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s13
470; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v1
471; GFX9-O0-NEXT:    v_min_u32_e64 v5, v4, v5
472; GFX9-O0-NEXT:    ; implicit-def: $sgpr16
473; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s12
474; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
475; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
476; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
477; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v2
478; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s13
479; GFX9-O0-NEXT:    v_ffbh_u32_e64 v10, v3
480; GFX9-O0-NEXT:    v_min_u32_e64 v11, v4, v10
481; GFX9-O0-NEXT:    ; implicit-def: $sgpr13
482; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s12
483; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
484; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v4
485; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v11
486; GFX9-O0-NEXT:    s_mov_b32 s12, s14
487; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
488; GFX9-O0-NEXT:    s_mov_b32 s14, s15
489; GFX9-O0-NEXT:    v_add_co_u32_e64 v10, s[12:13], v10, s12
490; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s14
491; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[12:13], v4, v11, s[12:13]
492; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
493; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v4
494; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v11
495; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[8:9]
496; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
497; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
498; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
499; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
500; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
501; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
502; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
503; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
504; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
505; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
506; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
507; GFX9-O0-NEXT:    v_sub_co_u32_e32 v4, vcc, v4, v7
508; GFX9-O0-NEXT:    v_subb_co_u32_e32 v8, vcc, v5, v6, vcc
509; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s10
510; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s10
511; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v6, vcc
512; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s11
513; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
514; GFX9-O0-NEXT:    v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
515; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
516; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
517; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
518; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
519; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
520; GFX9-O0-NEXT:    s_nop 0
521; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
522; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
523; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
524; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
525; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
526; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
527; GFX9-O0-NEXT:    s_nop 0
528; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
529; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
530; GFX9-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
531; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
532; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[14:15]
533; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
534; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[14:15]
535; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
536; GFX9-O0-NEXT:    v_and_b32_e64 v6, 1, v6
537; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[8:9], v6, 1
538; GFX9-O0-NEXT:    s_or_b64 s[8:9], s[4:5], s[8:9]
539; GFX9-O0-NEXT:    s_mov_b64 s[4:5], -1
540; GFX9-O0-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
541; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
542; GFX9-O0-NEXT:    s_mov_b32 s14, s13
543; GFX9-O0-NEXT:    v_xor_b32_e64 v6, v6, s14
544; GFX9-O0-NEXT:    ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
545; GFX9-O0-NEXT:    v_xor_b32_e64 v4, v4, s12
546; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
547; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
548; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
549; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v8
550; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v9
551; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
552; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
553; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
554; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
555; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
556; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
557; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
558; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v1, v4, s[8:9]
559; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s10
560; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[8:9]
561; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
562; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
563; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
564; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
565; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
566; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v3, v4, s[8:9]
567; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s10
568; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[8:9]
569; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
570; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
571; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
572; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
573; GFX9-O0-NEXT:    s_and_b64 s[6:7], s[4:5], s[6:7]
574; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
575; GFX9-O0-NEXT:    s_nop 0
576; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
577; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
578; GFX9-O0-NEXT:    s_nop 0
579; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
580; GFX9-O0-NEXT:    s_mov_b64 s[4:5], exec
581; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 4
582; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 5
583; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
584; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
585; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
586; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
587; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
588; GFX9-O0-NEXT:    s_cbranch_execz .LBB0_3
589; GFX9-O0-NEXT:    s_branch .LBB0_8
590; GFX9-O0-NEXT:  .LBB0_1: ; %Flow
591; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
592; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
593; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
594; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
595; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 6
596; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 7
597; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
598; GFX9-O0-NEXT:  ; %bb.2: ; %Flow
599; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
600; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
601; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
602; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
603; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
604; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
605; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
606; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
607; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
608; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
609; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
610; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
611; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
612; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
613; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
614; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
615; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
616; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
617; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
618; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
619; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
620; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
621; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
622; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
623; GFX9-O0-NEXT:    s_branch .LBB0_5
624; GFX9-O0-NEXT:  .LBB0_3: ; %Flow2
625; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
626; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
627; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
628; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
629; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 4
630; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 5
631; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
632; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
633; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
634; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
635; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
636; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
637; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
638; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
639; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
640; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
641; GFX9-O0-NEXT:    s_nop 0
642; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
643; GFX9-O0-NEXT:    s_branch .LBB0_9
644; GFX9-O0-NEXT:  .LBB0_4: ; %udiv-loop-exit
645; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
646; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
647; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
648; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
649; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
650; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
651; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
652; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
653; GFX9-O0-NEXT:    s_mov_b32 s4, 1
654; GFX9-O0-NEXT:    s_waitcnt vmcnt(2)
655; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s4, v[0:1]
656; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
657; GFX9-O0-NEXT:    v_lshlrev_b64 v[9:10], s4, v[9:10]
658; GFX9-O0-NEXT:    s_mov_b32 s4, 63
659; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
660; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v1
661; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
662; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v8
663; GFX9-O0-NEXT:    v_or3_b32 v4, v4, v11, v12
664; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
665; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v9
666; GFX9-O0-NEXT:    v_or3_b32 v0, v0, v1, v7
667; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
668; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
669; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
670; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v6
671; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v7
672; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
673; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
674; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
675; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
676; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
677; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
678; GFX9-O0-NEXT:    s_nop 0
679; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
680; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
681; GFX9-O0-NEXT:    s_nop 0
682; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
683; GFX9-O0-NEXT:    s_branch .LBB0_3
684; GFX9-O0-NEXT:  .LBB0_5: ; %Flow1
685; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
686; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
687; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
688; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
689; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 8
690; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 9
691; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
692; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
693; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
694; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
695; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
696; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
697; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
698; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
699; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
700; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
701; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
702; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
703; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
704; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
705; GFX9-O0-NEXT:    s_nop 0
706; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
707; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
708; GFX9-O0-NEXT:    s_nop 0
709; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
710; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
711; GFX9-O0-NEXT:    s_nop 0
712; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
713; GFX9-O0-NEXT:    s_branch .LBB0_4
714; GFX9-O0-NEXT:  .LBB0_6: ; %udiv-do-while
715; GFX9-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
716; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
717; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
718; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
719; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
720; GFX9-O0-NEXT:    v_readlane_b32 s6, v30, 10
721; GFX9-O0-NEXT:    v_readlane_b32 s7, v30, 11
722; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
723; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
724; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
725; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
726; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
727; GFX9-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
728; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
729; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
730; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
731; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
732; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
733; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
734; GFX9-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
735; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
736; GFX9-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
737; GFX9-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
738; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
739; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
740; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
741; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
742; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
743; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
744; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
745; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
746; GFX9-O0-NEXT:    s_mov_b32 s4, 63
747; GFX9-O0-NEXT:    s_waitcnt vmcnt(16)
748; GFX9-O0-NEXT:    v_lshrrev_b64 v[28:29], s4, v[2:3]
749; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v29
750; GFX9-O0-NEXT:    s_mov_b32 s5, 1
751; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], s5, v[22:23]
752; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v23
753; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
754; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v28
755; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v22
756; GFX9-O0-NEXT:    v_or_b32_e64 v22, v5, v10
757; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
758; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v4
759; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[2:3]
760; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], s4, v[6:7]
761; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v29
762; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
763; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
764; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v28
765; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
766; GFX9-O0-NEXT:    v_or_b32_e64 v4, v3, v4
767; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
768; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
769; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s5, v[0:1]
770; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[6:7]
771; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
772; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
773; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v29
774; GFX9-O0-NEXT:    s_waitcnt vmcnt(10)
775; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v27
776; GFX9-O0-NEXT:    v_or3_b32 v6, v6, v7, v10
777; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
778; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v28
779; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v26
780; GFX9-O0-NEXT:    v_or3_b32 v0, v0, v1, v7
781; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
782; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
783; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
784; GFX9-O0-NEXT:    s_waitcnt vmcnt(8)
785; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v25
786; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
787; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
788; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v24
789; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
790; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
791; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
792; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
793; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
794; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v22
795; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
796; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
797; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
798; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v14
799; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v15
800; GFX9-O0-NEXT:    v_sub_co_u32_e32 v13, vcc, v13, v6
801; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
802; GFX9-O0-NEXT:    v_subb_co_u32_e32 v12, vcc, v12, v10, vcc
803; GFX9-O0-NEXT:    v_subb_co_u32_e32 v11, vcc, v11, v4, vcc
804; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
805; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
806; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
807; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
808; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
809; GFX9-O0-NEXT:    v_ashrrev_i64 v[13:14], s4, v[11:12]
810; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
811; GFX9-O0-NEXT:    s_mov_b64 s[4:5], 1
812; GFX9-O0-NEXT:    s_mov_b32 s8, s5
813; GFX9-O0-NEXT:    v_and_b32_e64 v12, v7, s8
814; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v13
815; GFX9-O0-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
816; GFX9-O0-NEXT:    v_and_b32_e64 v14, v11, s4
817; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
818; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v12
819; GFX9-O0-NEXT:    v_mov_b32_e32 v12, 0
820; GFX9-O0-NEXT:    v_mov_b32_e32 v13, 0
821; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v21
822; GFX9-O0-NEXT:    v_and_b32_e64 v22, v7, v22
823; GFX9-O0-NEXT:    v_and_b32_e64 v20, v11, v20
824; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
825; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v22
826; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v19
827; GFX9-O0-NEXT:    v_and_b32_e64 v7, v7, v22
828; GFX9-O0-NEXT:    v_and_b32_e64 v22, v11, v18
829; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
830; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v7
831; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v22
832; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v23
833; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
834; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v21
835; GFX9-O0-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v19
836; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
837; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
838; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
839; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
840; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
841; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
842; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
843; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
844; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
845; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
846; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v10
847; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v8
848; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 killed $vgpr8_vgpr9 killed $exec
849; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
850; GFX9-O0-NEXT:    s_mov_b32 s5, s8
851; GFX9-O0-NEXT:    s_mov_b32 s4, s9
852; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v16
853; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
854; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
855; GFX9-O0-NEXT:    v_add_co_u32_e32 v19, vcc, v11, v16
856; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s4
857; GFX9-O0-NEXT:    v_addc_co_u32_e32 v9, vcc, v9, v11, vcc
858; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s5
859; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
860; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s4
861; GFX9-O0-NEXT:    v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
862; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
863; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
864; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
865; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v9
866; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
867; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
868; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
869; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v8
870; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v16
871; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v17
872; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v19
873; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
874; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v17
875; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v20
876; GFX9-O0-NEXT:    v_or_b32_e64 v18, v18, v21
877; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v16
878; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v19
879; GFX9-O0-NEXT:    v_or_b32_e64 v16, v16, v17
880; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
881; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v18
882; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13]
883; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
884; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v3
885; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v2
886; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
887; GFX9-O0-NEXT:    s_nop 0
888; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
889; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v1
890; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v0
891; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
892; GFX9-O0-NEXT:    s_nop 0
893; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
894; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v15
895; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v14
896; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
897; GFX9-O0-NEXT:    s_nop 0
898; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
899; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v13
900; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v12
901; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
902; GFX9-O0-NEXT:    s_nop 0
903; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
904; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
905; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 6
906; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 7
907; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
908; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 10
909; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 11
910; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
911; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
912; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
913; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
914; GFX9-O0-NEXT:    s_nop 0
915; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
916; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
917; GFX9-O0-NEXT:    s_nop 0
918; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
919; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
920; GFX9-O0-NEXT:    s_nop 0
921; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
922; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
923; GFX9-O0-NEXT:    s_nop 0
924; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
925; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
926; GFX9-O0-NEXT:    s_nop 0
927; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
928; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
929; GFX9-O0-NEXT:    s_nop 0
930; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
931; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
932; GFX9-O0-NEXT:    s_nop 0
933; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
934; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
935; GFX9-O0-NEXT:    s_nop 0
936; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
937; GFX9-O0-NEXT:    s_andn2_b64 exec, exec, s[4:5]
938; GFX9-O0-NEXT:    s_cbranch_execnz .LBB0_6
939; GFX9-O0-NEXT:    s_branch .LBB0_1
940; GFX9-O0-NEXT:  .LBB0_7: ; %udiv-preheader
941; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
942; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
943; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
944; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
945; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
946; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
947; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
948; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
949; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
950; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
951; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
952; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
953; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
954; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
955; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
956; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
957; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
958; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
959; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
960; GFX9-O0-NEXT:    s_waitcnt vmcnt(9)
961; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
962; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
963; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], v4, v[20:21]
964; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
965; GFX9-O0-NEXT:    s_mov_b32 s6, 64
966; GFX9-O0-NEXT:    v_sub_u32_e64 v12, s6, v4
967; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], v12, v[18:19]
968; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v23
969; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v12
970; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
971; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v22
972; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
973; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
974; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
975; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
976; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v4, s6
977; GFX9-O0-NEXT:    v_sub_u32_e64 v5, v4, s6
978; GFX9-O0-NEXT:    v_lshrrev_b64 v[22:23], v5, v[18:19]
979; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
980; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[4:5]
981; GFX9-O0-NEXT:    s_mov_b32 s6, 0
982; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v4, s6
983; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v21
984; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[6:7]
985; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
986; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v22
987; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
988; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v20
989; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[6:7]
990; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
991; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
992; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
993; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
994; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], v4, v[18:19]
995; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v5
996; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
997; GFX9-O0-NEXT:    s_mov_b32 s8, s7
998; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s8
999; GFX9-O0-NEXT:    v_cndmask_b32_e64 v12, v12, v15, s[4:5]
1000; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
1001; GFX9-O0-NEXT:    s_mov_b32 s8, s6
1002; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
1003; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
1004; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1005; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1006; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
1007; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
1008; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v13
1009; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v14
1010; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
1011; GFX9-O0-NEXT:    s_mov_b32 s5, s8
1012; GFX9-O0-NEXT:    s_mov_b32 s4, s9
1013; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v16
1014; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v17
1015; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
1016; GFX9-O0-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v16
1017; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s4
1018; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v15, v16, vcc
1019; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s5
1020; GFX9-O0-NEXT:    v_addc_co_u32_e32 v14, vcc, v14, v15, vcc
1021; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s4
1022; GFX9-O0-NEXT:    v_addc_co_u32_e32 v13, vcc, v13, v15, vcc
1023; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1024; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1025; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
1026; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v13
1027; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1028; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1029; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
1030; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v16
1031; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
1032; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
1033; GFX9-O0-NEXT:    s_nop 0
1034; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
1035; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
1036; GFX9-O0-NEXT:    s_nop 0
1037; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
1038; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[6:7]
1039; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s9
1040; GFX9-O0-NEXT:    v_mov_b32_e32 v14, s8
1041; GFX9-O0-NEXT:    v_mov_b32_e32 v13, s7
1042; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s6
1043; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 10
1044; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 11
1045; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
1046; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
1047; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
1048; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
1049; GFX9-O0-NEXT:    s_nop 0
1050; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
1051; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
1052; GFX9-O0-NEXT:    s_nop 0
1053; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
1054; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
1055; GFX9-O0-NEXT:    s_nop 0
1056; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
1057; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
1058; GFX9-O0-NEXT:    s_nop 0
1059; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
1060; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
1061; GFX9-O0-NEXT:    s_nop 0
1062; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
1063; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
1064; GFX9-O0-NEXT:    s_nop 0
1065; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
1066; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
1067; GFX9-O0-NEXT:    s_nop 0
1068; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
1069; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
1070; GFX9-O0-NEXT:    s_nop 0
1071; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
1072; GFX9-O0-NEXT:    s_branch .LBB0_6
1073; GFX9-O0-NEXT:  .LBB0_8: ; %udiv-bb1
1074; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
1075; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
1076; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
1077; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1078; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1079; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1080; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1081; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1082; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1083; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1084; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1085; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 1
1086; GFX9-O0-NEXT:    s_mov_b32 s5, s6
1087; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
1088; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
1089; GFX9-O0-NEXT:    s_mov_b32 s4, s7
1090; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
1091; GFX9-O0-NEXT:    s_mov_b32 s8, s6
1092; GFX9-O0-NEXT:    s_mov_b32 s9, s7
1093; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v4
1094; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
1095; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
1096; GFX9-O0-NEXT:    v_add_co_u32_e32 v8, vcc, v3, v4
1097; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s4
1098; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
1099; GFX9-O0-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v4, vcc
1100; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
1101; GFX9-O0-NEXT:    v_addc_co_u32_e32 v0, vcc, v0, v4, vcc
1102; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s9
1103; GFX9-O0-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
1104; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1105; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1106; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1107; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
1108; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1109; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1110; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1111; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
1112; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
1113; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
1114; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
1115; GFX9-O0-NEXT:    s_nop 0
1116; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
1117; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
1118; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
1119; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
1120; GFX9-O0-NEXT:    s_nop 0
1121; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
1122; GFX9-O0-NEXT:    s_mov_b32 s4, 0x7f
1123; GFX9-O0-NEXT:    v_sub_u32_e64 v2, s4, v3
1124; GFX9-O0-NEXT:    v_lshlrev_b64 v[4:5], v2, v[10:11]
1125; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
1126; GFX9-O0-NEXT:    s_mov_b32 s4, 64
1127; GFX9-O0-NEXT:    v_sub_u32_e64 v13, s4, v2
1128; GFX9-O0-NEXT:    v_lshrrev_b64 v[13:14], v13, v[6:7]
1129; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v14
1130; GFX9-O0-NEXT:    v_or_b32_e64 v12, v12, v15
1131; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
1132; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
1133; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
1134; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
1135; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
1136; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v5
1137; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v2, s4
1138; GFX9-O0-NEXT:    s_mov_b32 s10, 63
1139; GFX9-O0-NEXT:    v_sub_u32_e64 v3, s10, v3
1140; GFX9-O0-NEXT:    v_lshlrev_b64 v[12:13], v3, v[6:7]
1141; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
1142; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[4:5]
1143; GFX9-O0-NEXT:    s_mov_b32 s10, 0
1144; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[10:11], v2, s10
1145; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v11
1146; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[10:11]
1147; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
1148; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
1149; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
1150; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
1151; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[10:11]
1152; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
1153; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
1154; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
1155; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v3
1156; GFX9-O0-NEXT:    v_lshlrev_b64 v[6:7], v2, v[6:7]
1157; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v7
1158; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s9
1159; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[4:5]
1160; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
1161; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s8
1162; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v3, v6, s[4:5]
1163; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1164; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1165; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
1166; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v2
1167; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
1168; GFX9-O0-NEXT:    s_nop 0
1169; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
1170; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
1171; GFX9-O0-NEXT:    s_nop 0
1172; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
1173; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
1174; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v9
1175; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
1176; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
1177; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v8
1178; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v1
1179; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1180; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
1181; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7]
1182; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
1183; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s8
1184; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s9
1185; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s6
1186; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s7
1187; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
1188; GFX9-O0-NEXT:    s_nop 0
1189; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
1190; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
1191; GFX9-O0-NEXT:    s_nop 0
1192; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
1193; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
1194; GFX9-O0-NEXT:    s_nop 0
1195; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
1196; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
1197; GFX9-O0-NEXT:    s_nop 0
1198; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
1199; GFX9-O0-NEXT:    s_mov_b64 s[6:7], exec
1200; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
1201; GFX9-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
1202; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 8
1203; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 9
1204; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
1205; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
1206; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
1207; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
1208; GFX9-O0-NEXT:    s_cbranch_execz .LBB0_5
1209; GFX9-O0-NEXT:    s_branch .LBB0_7
1210; GFX9-O0-NEXT:  .LBB0_9: ; %udiv-end
1211; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
1212; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
1213; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
1214; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
1215; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
1216; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
1217; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
1218; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
1219; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
1220; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
1221; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
1222; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
1223; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
1224; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
1225; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
1226; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
1227; GFX9-O0-NEXT:    s_mov_b32 s4, 32
1228; GFX9-O0-NEXT:    s_waitcnt vmcnt(2)
1229; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[16:17]
1230; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
1231; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
1232; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v20
1233; GFX9-O0-NEXT:    v_mul_lo_u32 v8, v1, v0
1234; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
1235; GFX9-O0-NEXT:    v_lshrrev_b64 v[20:21], s4, v[20:21]
1236; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v20
1237; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v16
1238; GFX9-O0-NEXT:    v_mul_lo_u32 v5, v2, v5
1239; GFX9-O0-NEXT:    v_mad_u64_u32 v[16:17], s[6:7], v2, v0, 0
1240; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v17
1241; GFX9-O0-NEXT:    v_add3_u32 v8, v0, v5, v8
1242; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
1243; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
1244; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
1245; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s5
1246; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1247; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v0
1248; GFX9-O0-NEXT:    v_lshlrev_b64 v[8:9], s4, v[8:9]
1249; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
1250; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 killed $vgpr16_vgpr17 killed $exec
1251; GFX9-O0-NEXT:    s_mov_b32 s5, 0
1252; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
1253; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s5
1254; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
1255; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v0
1256; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v17
1257; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v5
1258; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec
1259; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v16
1260; GFX9-O0-NEXT:    v_or_b32_e64 v16, v5, v8
1261; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
1262; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v0
1263; GFX9-O0-NEXT:    v_lshrrev_b64 v[8:9], s4, v[18:19]
1264; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
1265; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v14
1266; GFX9-O0-NEXT:    v_mul_lo_u32 v9, v8, v5
1267; GFX9-O0-NEXT:    v_lshrrev_b64 v[14:15], s4, v[14:15]
1268; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
1269; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v18
1270; GFX9-O0-NEXT:    v_mul_lo_u32 v14, v14, v0
1271; GFX9-O0-NEXT:    v_mad_u64_u32 v[18:19], s[6:7], v8, v0, 0
1272; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v19
1273; GFX9-O0-NEXT:    v_add3_u32 v8, v8, v9, v14
1274; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
1275; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
1276; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
1277; GFX9-O0-NEXT:    v_mov_b32_e32 v14, s6
1278; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1279; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v14
1280; GFX9-O0-NEXT:    v_lshlrev_b64 v[8:9], s4, v[8:9]
1281; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v9
1282; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 killed $vgpr18_vgpr19 killed $exec
1283; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
1284; GFX9-O0-NEXT:    v_mov_b32_e32 v14, s5
1285; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
1286; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v14
1287; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v19
1288; GFX9-O0-NEXT:    v_or_b32_e64 v14, v14, v15
1289; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v8
1290; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v18
1291; GFX9-O0-NEXT:    v_or_b32_e64 v8, v8, v9
1292; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1293; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v14
1294; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v8
1295; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
1296; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v9
1297; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v17
1298; GFX9-O0-NEXT:    v_add_co_u32_e64 v16, s[6:7], v14, v15
1299; GFX9-O0-NEXT:    v_addc_co_u32_e64 v8, s[6:7], v8, v9, s[6:7]
1300; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
1301; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v8
1302; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v5, v1, 0
1303; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v14
1304; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
1305; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s5
1306; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
1307; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v8
1308; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v19
1309; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v15
1310; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
1311; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
1312; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
1313; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s6
1314; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
1315; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v9
1316; GFX9-O0-NEXT:    v_lshlrev_b64 v[14:15], s4, v[14:15]
1317; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v15
1318; GFX9-O0-NEXT:    v_or_b32_e64 v8, v8, v9
1319; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
1320; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
1321; GFX9-O0-NEXT:    v_or_b32_e64 v20, v9, v14
1322; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
1323; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v8
1324; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v5, v2, 0
1325; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v14
1326; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
1327; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s5
1328; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1329; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v5
1330; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
1331; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v15
1332; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
1333; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
1334; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
1335; GFX9-O0-NEXT:    v_mov_b32_e32 v18, s6
1336; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
1337; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v18
1338; GFX9-O0-NEXT:    v_lshlrev_b64 v[14:15], s4, v[14:15]
1339; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v15
1340; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v18
1341; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec
1342; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v14
1343; GFX9-O0-NEXT:    v_or_b32_e64 v22, v8, v9
1344; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
1345; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v5
1346; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v0, v2, 0
1347; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v15
1348; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
1349; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s5
1350; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
1351; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v2
1352; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v22
1353; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
1354; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v23
1355; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v19
1356; GFX9-O0-NEXT:    v_add_co_u32_e64 v8, s[6:7], v8, v9
1357; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7]
1358; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1359; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v2
1360; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v9
1361; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0xffffffff
1362; GFX9-O0-NEXT:    s_mov_b32 s8, s7
1363; GFX9-O0-NEXT:    v_and_b32_e64 v2, v2, s8
1364; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
1365; GFX9-O0-NEXT:    ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7
1366; GFX9-O0-NEXT:    v_and_b32_e64 v18, v5, s6
1367; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
1368; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v2
1369; GFX9-O0-NEXT:    v_mad_u64_u32 v[22:23], s[6:7], v0, v1, 0
1370; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v22
1371; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
1372; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s5
1373; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
1374; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
1375; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v2
1376; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v23
1377; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
1378; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
1379; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
1380; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s6
1381; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
1382; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v5
1383; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], s4, v[22:23]
1384; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
1385; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v5
1386; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec
1387; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v22
1388; GFX9-O0-NEXT:    v_or_b32_e64 v1, v1, v2
1389; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
1390; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
1391; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v1
1392; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v18
1393; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
1394; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v19
1395; GFX9-O0-NEXT:    v_add_co_u32_e64 v0, s[6:7], v0, v5
1396; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v1, v2, s[6:7]
1397; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1398; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
1399; GFX9-O0-NEXT:    v_lshrrev_b64 v[18:19], s4, v[0:1]
1400; GFX9-O0-NEXT:    v_lshrrev_b64 v[22:23], s4, v[8:9]
1401; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v22
1402; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
1403; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v23
1404; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v19
1405; GFX9-O0-NEXT:    v_add_co_u32_e64 v18, s[6:7], v8, v9
1406; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7]
1407; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
1408; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v2
1409; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v20
1410; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
1411; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v21
1412; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v19
1413; GFX9-O0-NEXT:    v_add_co_u32_e64 v18, s[6:7], v8, v9
1414; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7]
1415; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
1416; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v2
1417; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v18
1418; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v16
1419; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v19
1420; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v17
1421; GFX9-O0-NEXT:    v_add_co_u32_e64 v8, s[6:7], v8, v9
1422; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7]
1423; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1424; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v2
1425; GFX9-O0-NEXT:    v_lshlrev_b64 v[0:1], s4, v[0:1]
1426; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
1427; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
1428; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
1429; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s5
1430; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
1431; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v2
1432; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v15
1433; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v5
1434; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
1435; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v14
1436; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v1
1437; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1438; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
1439; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
1440; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v9
1441; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v0
1442; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v1
1443; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v3
1444; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
1445; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
1446; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v13
1447; GFX9-O0-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v9
1448; GFX9-O0-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v8, vcc
1449; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v5, vcc
1450; GFX9-O0-NEXT:    v_subb_co_u32_e32 v2, vcc, v1, v2, vcc
1451; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
1452; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
1453; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1454; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
1455; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
1456; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
1457; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
1458; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
1459; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
1460; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v7
1461; GFX9-O0-NEXT:    v_xor_b32_e64 v3, v3, v2
1462; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
1463; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v6
1464; GFX9-O0-NEXT:    v_xor_b32_e64 v8, v5, v4
1465; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1466; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v3
1467; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
1468; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
1469; GFX9-O0-NEXT:    v_xor_b32_e64 v3, v3, v6
1470; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
1471; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v10
1472; GFX9-O0-NEXT:    v_xor_b32_e64 v0, v0, v7
1473; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1474; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
1475; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v0
1476; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v1
1477; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v8
1478; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v9
1479; GFX9-O0-NEXT:    v_sub_co_u32_e32 v5, vcc, v5, v7
1480; GFX9-O0-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v6, vcc
1481; GFX9-O0-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
1482; GFX9-O0-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
1483; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
1484; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
1485; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
1486; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
1487; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
1488; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
1489; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
1490; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v0
1491; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v5
1492; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
1493; GFX9-O0-NEXT:    v_lshrrev_b64 v[5:6], s4, v[5:6]
1494; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
1495; GFX9-O0-NEXT:    v_lshrrev_b64 v[3:4], s4, v[3:4]
1496; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
1497; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
1498; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
1499; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
1500; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
1501; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
1502  %div = srem i128 %lhs, %rhs
1503  ret i128 %div
1504}
1505
1506define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
1507; GFX9-LABEL: v_urem_i128_vv:
1508; GFX9:       ; %bb.0: ; %_udiv-special-cases
1509; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1510; GFX9-NEXT:    v_or_b32_e32 v9, v5, v7
1511; GFX9-NEXT:    v_or_b32_e32 v8, v4, v6
1512; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
1513; GFX9-NEXT:    v_or_b32_e32 v9, v1, v3
1514; GFX9-NEXT:    v_or_b32_e32 v8, v0, v2
1515; GFX9-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[8:9]
1516; GFX9-NEXT:    v_ffbh_u32_e32 v8, v6
1517; GFX9-NEXT:    v_add_u32_e32 v8, 32, v8
1518; GFX9-NEXT:    v_ffbh_u32_e32 v9, v7
1519; GFX9-NEXT:    v_min_u32_e32 v8, v8, v9
1520; GFX9-NEXT:    v_ffbh_u32_e32 v9, v4
1521; GFX9-NEXT:    v_add_u32_e32 v9, 32, v9
1522; GFX9-NEXT:    v_ffbh_u32_e32 v10, v5
1523; GFX9-NEXT:    v_min_u32_e32 v9, v9, v10
1524; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1525; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, 64, v9
1526; GFX9-NEXT:    v_addc_co_u32_e64 v10, s[6:7], 0, 0, vcc
1527; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
1528; GFX9-NEXT:    v_ffbh_u32_e32 v11, v3
1529; GFX9-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
1530; GFX9-NEXT:    v_ffbh_u32_e32 v9, v2
1531; GFX9-NEXT:    v_add_u32_e32 v9, 32, v9
1532; GFX9-NEXT:    v_min_u32_e32 v9, v9, v11
1533; GFX9-NEXT:    v_ffbh_u32_e32 v11, v0
1534; GFX9-NEXT:    v_add_u32_e32 v11, 32, v11
1535; GFX9-NEXT:    v_ffbh_u32_e32 v12, v1
1536; GFX9-NEXT:    v_min_u32_e32 v11, v11, v12
1537; GFX9-NEXT:    v_cndmask_b32_e64 v10, v10, 0, vcc
1538; GFX9-NEXT:    v_add_co_u32_e32 v11, vcc, 64, v11
1539; GFX9-NEXT:    v_addc_co_u32_e64 v12, s[6:7], 0, 0, vcc
1540; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
1541; GFX9-NEXT:    s_mov_b64 s[6:7], 0x7f
1542; GFX9-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
1543; GFX9-NEXT:    v_cndmask_b32_e64 v12, v12, 0, vcc
1544; GFX9-NEXT:    v_sub_co_u32_e32 v8, vcc, v8, v9
1545; GFX9-NEXT:    v_subb_co_u32_e32 v9, vcc, v10, v12, vcc
1546; GFX9-NEXT:    v_mov_b32_e32 v11, 0
1547; GFX9-NEXT:    v_subbrev_co_u32_e32 v10, vcc, 0, v11, vcc
1548; GFX9-NEXT:    v_subbrev_co_u32_e32 v11, vcc, 0, v11, vcc
1549; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
1550; GFX9-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1551; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
1552; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1553; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
1554; GFX9-NEXT:    v_cndmask_b32_e32 v12, v13, v12, vcc
1555; GFX9-NEXT:    v_and_b32_e32 v12, 1, v12
1556; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v12
1557; GFX9-NEXT:    v_xor_b32_e32 v12, 0x7f, v8
1558; GFX9-NEXT:    v_or_b32_e32 v13, v9, v11
1559; GFX9-NEXT:    v_or_b32_e32 v12, v12, v10
1560; GFX9-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
1561; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[12:13]
1562; GFX9-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
1563; GFX9-NEXT:    v_cndmask_b32_e64 v15, v3, 0, s[4:5]
1564; GFX9-NEXT:    v_cndmask_b32_e64 v14, v2, 0, s[4:5]
1565; GFX9-NEXT:    v_cndmask_b32_e64 v13, v1, 0, s[4:5]
1566; GFX9-NEXT:    v_cndmask_b32_e64 v12, v0, 0, s[4:5]
1567; GFX9-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
1568; GFX9-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
1569; GFX9-NEXT:    s_cbranch_execz .LBB1_6
1570; GFX9-NEXT:  ; %bb.1: ; %udiv-bb1
1571; GFX9-NEXT:    v_add_co_u32_e32 v22, vcc, 1, v8
1572; GFX9-NEXT:    v_addc_co_u32_e32 v23, vcc, 0, v9, vcc
1573; GFX9-NEXT:    v_addc_co_u32_e32 v24, vcc, 0, v10, vcc
1574; GFX9-NEXT:    v_sub_u32_e32 v15, 0x7f, v8
1575; GFX9-NEXT:    v_addc_co_u32_e32 v25, vcc, 0, v11, vcc
1576; GFX9-NEXT:    v_sub_u32_e32 v13, 64, v15
1577; GFX9-NEXT:    v_or_b32_e32 v10, v23, v25
1578; GFX9-NEXT:    v_or_b32_e32 v9, v22, v24
1579; GFX9-NEXT:    v_lshlrev_b64 v[11:12], v15, v[2:3]
1580; GFX9-NEXT:    v_lshrrev_b64 v[13:14], v13, v[0:1]
1581; GFX9-NEXT:    v_sub_u32_e32 v8, 63, v8
1582; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[9:10]
1583; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v8, v[0:1]
1584; GFX9-NEXT:    v_or_b32_e32 v10, v12, v14
1585; GFX9-NEXT:    v_or_b32_e32 v11, v11, v13
1586; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v15
1587; GFX9-NEXT:    v_cndmask_b32_e64 v9, v9, v10, s[4:5]
1588; GFX9-NEXT:    v_cndmask_b32_e64 v8, v8, v11, s[4:5]
1589; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v15, v[0:1]
1590; GFX9-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v15
1591; GFX9-NEXT:    v_mov_b32_e32 v12, 0
1592; GFX9-NEXT:    v_mov_b32_e32 v14, 0
1593; GFX9-NEXT:    v_cndmask_b32_e64 v9, v9, v3, s[6:7]
1594; GFX9-NEXT:    v_cndmask_b32_e64 v8, v8, v2, s[6:7]
1595; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, v11, s[4:5]
1596; GFX9-NEXT:    v_mov_b32_e32 v13, 0
1597; GFX9-NEXT:    v_mov_b32_e32 v15, 0
1598; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, v10, s[4:5]
1599; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1600; GFX9-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
1601; GFX9-NEXT:    s_cbranch_execz .LBB1_5
1602; GFX9-NEXT:  ; %bb.2: ; %udiv-preheader
1603; GFX9-NEXT:    v_sub_u32_e32 v14, 64, v22
1604; GFX9-NEXT:    v_lshrrev_b64 v[12:13], v22, v[0:1]
1605; GFX9-NEXT:    v_lshlrev_b64 v[14:15], v14, v[2:3]
1606; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v22
1607; GFX9-NEXT:    v_or_b32_e32 v14, v12, v14
1608; GFX9-NEXT:    v_subrev_u32_e32 v12, 64, v22
1609; GFX9-NEXT:    v_or_b32_e32 v15, v13, v15
1610; GFX9-NEXT:    v_lshrrev_b64 v[12:13], v12, v[2:3]
1611; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v22
1612; GFX9-NEXT:    v_cndmask_b32_e32 v13, v13, v15, vcc
1613; GFX9-NEXT:    v_cndmask_b32_e64 v17, v13, v1, s[4:5]
1614; GFX9-NEXT:    v_cndmask_b32_e32 v14, v12, v14, vcc
1615; GFX9-NEXT:    v_lshrrev_b64 v[12:13], v22, v[2:3]
1616; GFX9-NEXT:    v_cndmask_b32_e64 v16, v14, v0, s[4:5]
1617; GFX9-NEXT:    v_cndmask_b32_e32 v19, 0, v13, vcc
1618; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v12, vcc
1619; GFX9-NEXT:    v_add_co_u32_e32 v26, vcc, -1, v4
1620; GFX9-NEXT:    v_addc_co_u32_e32 v27, vcc, -1, v5, vcc
1621; GFX9-NEXT:    v_addc_co_u32_e32 v28, vcc, -1, v6, vcc
1622; GFX9-NEXT:    v_mov_b32_e32 v20, 0
1623; GFX9-NEXT:    v_mov_b32_e32 v14, 0
1624; GFX9-NEXT:    v_addc_co_u32_e32 v29, vcc, -1, v7, vcc
1625; GFX9-NEXT:    s_mov_b64 s[4:5], 0
1626; GFX9-NEXT:    v_mov_b32_e32 v21, 0
1627; GFX9-NEXT:    v_mov_b32_e32 v15, 0
1628; GFX9-NEXT:    v_mov_b32_e32 v13, 0
1629; GFX9-NEXT:  .LBB1_3: ; %udiv-do-while
1630; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
1631; GFX9-NEXT:    v_lshlrev_b64 v[30:31], 1, v[10:11]
1632; GFX9-NEXT:    v_lshrrev_b32_e32 v12, 31, v11
1633; GFX9-NEXT:    v_or_b32_e32 v10, v20, v30
1634; GFX9-NEXT:    v_lshrrev_b32_e32 v20, 31, v17
1635; GFX9-NEXT:    v_lshlrev_b64 v[16:17], 1, v[16:17]
1636; GFX9-NEXT:    v_or_b32_e32 v11, v21, v31
1637; GFX9-NEXT:    v_lshlrev_b64 v[18:19], 1, v[18:19]
1638; GFX9-NEXT:    v_lshrrev_b32_e32 v21, 31, v9
1639; GFX9-NEXT:    v_or_b32_e32 v16, v16, v21
1640; GFX9-NEXT:    v_or_b32_e32 v18, v18, v20
1641; GFX9-NEXT:    v_sub_co_u32_e32 v20, vcc, v26, v16
1642; GFX9-NEXT:    v_subb_co_u32_e32 v20, vcc, v27, v17, vcc
1643; GFX9-NEXT:    v_subb_co_u32_e32 v20, vcc, v28, v18, vcc
1644; GFX9-NEXT:    v_subb_co_u32_e32 v20, vcc, v29, v19, vcc
1645; GFX9-NEXT:    v_ashrrev_i32_e32 v30, 31, v20
1646; GFX9-NEXT:    v_and_b32_e32 v20, v30, v4
1647; GFX9-NEXT:    v_lshlrev_b64 v[8:9], 1, v[8:9]
1648; GFX9-NEXT:    v_sub_co_u32_e32 v16, vcc, v16, v20
1649; GFX9-NEXT:    v_and_b32_e32 v20, v30, v5
1650; GFX9-NEXT:    v_subb_co_u32_e32 v17, vcc, v17, v20, vcc
1651; GFX9-NEXT:    v_or3_b32 v8, v8, v12, v14
1652; GFX9-NEXT:    v_and_b32_e32 v12, v30, v6
1653; GFX9-NEXT:    v_and_b32_e32 v20, v30, v7
1654; GFX9-NEXT:    v_subb_co_u32_e32 v18, vcc, v18, v12, vcc
1655; GFX9-NEXT:    v_subb_co_u32_e32 v19, vcc, v19, v20, vcc
1656; GFX9-NEXT:    v_add_co_u32_e32 v22, vcc, -1, v22
1657; GFX9-NEXT:    v_addc_co_u32_e32 v23, vcc, -1, v23, vcc
1658; GFX9-NEXT:    v_addc_co_u32_e32 v24, vcc, -1, v24, vcc
1659; GFX9-NEXT:    v_addc_co_u32_e32 v25, vcc, -1, v25, vcc
1660; GFX9-NEXT:    v_or_b32_e32 v20, v22, v24
1661; GFX9-NEXT:    v_or_b32_e32 v21, v23, v25
1662; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[20:21]
1663; GFX9-NEXT:    v_and_b32_e32 v12, 1, v30
1664; GFX9-NEXT:    v_mov_b32_e32 v21, v13
1665; GFX9-NEXT:    v_or3_b32 v9, v9, 0, v15
1666; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1667; GFX9-NEXT:    v_mov_b32_e32 v20, v12
1668; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1669; GFX9-NEXT:    s_cbranch_execnz .LBB1_3
1670; GFX9-NEXT:  ; %bb.4: ; %Flow
1671; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
1672; GFX9-NEXT:  .LBB1_5: ; %Flow2
1673; GFX9-NEXT:    s_or_b64 exec, exec, s[6:7]
1674; GFX9-NEXT:    v_lshlrev_b64 v[16:17], 1, v[10:11]
1675; GFX9-NEXT:    v_lshlrev_b64 v[8:9], 1, v[8:9]
1676; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 31, v11
1677; GFX9-NEXT:    v_or3_b32 v15, v9, 0, v15
1678; GFX9-NEXT:    v_or3_b32 v14, v8, v10, v14
1679; GFX9-NEXT:    v_or_b32_e32 v13, v13, v17
1680; GFX9-NEXT:    v_or_b32_e32 v12, v12, v16
1681; GFX9-NEXT:  .LBB1_6: ; %Flow3
1682; GFX9-NEXT:    s_or_b64 exec, exec, s[8:9]
1683; GFX9-NEXT:    v_mul_lo_u32 v19, v12, v7
1684; GFX9-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v4, v12, 0
1685; GFX9-NEXT:    v_mov_b32_e32 v17, 0
1686; GFX9-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v12, v6, 0
1687; GFX9-NEXT:    v_mov_b32_e32 v16, v8
1688; GFX9-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v5, v12, v[16:17]
1689; GFX9-NEXT:    v_mul_lo_u32 v18, v13, v6
1690; GFX9-NEXT:    v_mul_lo_u32 v16, v15, v4
1691; GFX9-NEXT:    v_mov_b32_e32 v6, v12
1692; GFX9-NEXT:    v_mov_b32_e32 v12, v17
1693; GFX9-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v4, v13, v[11:12]
1694; GFX9-NEXT:    v_add3_u32 v10, v10, v19, v18
1695; GFX9-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v14, v4, v[9:10]
1696; GFX9-NEXT:    v_mov_b32_e32 v4, v12
1697; GFX9-NEXT:    v_mul_lo_u32 v10, v14, v5
1698; GFX9-NEXT:    v_add_co_u32_e32 v14, vcc, v6, v4
1699; GFX9-NEXT:    v_addc_co_u32_e64 v15, s[4:5], 0, 0, vcc
1700; GFX9-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v5, v13, v[14:15]
1701; GFX9-NEXT:    v_add3_u32 v6, v16, v9, v10
1702; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v4, v8
1703; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v5, v6, vcc
1704; GFX9-NEXT:    v_mov_b32_e32 v6, v11
1705; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v7
1706; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
1707; GFX9-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v4, vcc
1708; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v5, vcc
1709; GFX9-NEXT:    s_setpc_b64 s[30:31]
1710;
1711; GFX9-O0-LABEL: v_urem_i128_vv:
1712; GFX9-O0:       ; %bb.0: ; %_udiv-special-cases
1713; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1714; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
1715; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
1716; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
1717; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v6
1718; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
1719; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v2
1720; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v0
1721; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
1722; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1723; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1724; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1725; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
1726; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1727; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1728; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
1729; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v3
1730; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1731; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1732; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1733; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
1734; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1735; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1736; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
1737; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v7
1738; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
1739; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
1740; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
1741; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
1742; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v12
1743; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
1744; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
1745; GFX9-O0-NEXT:    s_nop 0
1746; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
1747; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
1748; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
1749; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
1750; GFX9-O0-NEXT:    s_nop 0
1751; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
1752; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v10
1753; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
1754; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
1755; GFX9-O0-NEXT:    s_nop 0
1756; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
1757; GFX9-O0-NEXT:    s_waitcnt vmcnt(6)
1758; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
1759; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
1760; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
1761; GFX9-O0-NEXT:    s_nop 0
1762; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
1763; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v10
1764; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
1765; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
1766; GFX9-O0-NEXT:    s_nop 0
1767; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
1768; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
1769; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
1770; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
1771; GFX9-O0-NEXT:    s_nop 0
1772; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
1773; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v12
1774; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
1775; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
1776; GFX9-O0-NEXT:    s_nop 0
1777; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
1778; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
1779; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
1780; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
1781; GFX9-O0-NEXT:    s_nop 0
1782; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
1783; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
1784; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
1785; GFX9-O0-NEXT:    v_or_b32_e64 v2, v7, v6
1786; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
1787; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
1788; GFX9-O0-NEXT:    v_or_b32_e64 v0, v4, v5
1789; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1790; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
1791; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
1792; GFX9-O0-NEXT:    ; implicit-def: $vgpr30 : SGPR spill to VGPR lane
1793; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 0
1794; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 1
1795; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[0:1], s[6:7]
1796; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v13
1797; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
1798; GFX9-O0-NEXT:    v_or_b32_e64 v14, v3, v1
1799; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v12
1800; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
1801; GFX9-O0-NEXT:    v_or_b32_e64 v8, v2, v0
1802; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1803; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v14
1804; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
1805; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
1806; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v5
1807; GFX9-O0-NEXT:    s_mov_b32 s9, 32
1808; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
1809; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v6
1810; GFX9-O0-NEXT:    v_min_u32_e64 v5, v5, v6
1811; GFX9-O0-NEXT:    s_mov_b32 s8, 0
1812; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
1813; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s8
1814; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
1815; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
1816; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v6
1817; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v4
1818; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
1819; GFX9-O0-NEXT:    v_ffbh_u32_e64 v7, v7
1820; GFX9-O0-NEXT:    v_min_u32_e64 v14, v4, v7
1821; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
1822; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
1823; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
1824; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v4
1825; GFX9-O0-NEXT:    s_mov_b64 s[10:11], 64
1826; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
1827; GFX9-O0-NEXT:    s_mov_b32 s12, s10
1828; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v15
1829; GFX9-O0-NEXT:    s_mov_b32 s14, s11
1830; GFX9-O0-NEXT:    v_add_co_u32_e64 v7, s[12:13], v7, s12
1831; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s14
1832; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13]
1833; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
1834; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v4
1835; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
1836; GFX9-O0-NEXT:    s_mov_b64 s[12:13], s[6:7]
1837; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[12:13], v[10:11], s[12:13]
1838; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[12:13]
1839; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
1840; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
1841; GFX9-O0-NEXT:    v_cndmask_b32_e64 v8, v5, v6, s[12:13]
1842; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
1843; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
1844; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1845; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v4
1846; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v0
1847; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
1848; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v1
1849; GFX9-O0-NEXT:    v_min_u32_e64 v5, v4, v5
1850; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
1851; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
1852; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
1853; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
1854; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
1855; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v2
1856; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
1857; GFX9-O0-NEXT:    v_ffbh_u32_e64 v10, v3
1858; GFX9-O0-NEXT:    v_min_u32_e64 v14, v4, v10
1859; GFX9-O0-NEXT:    ; implicit-def: $sgpr9
1860; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
1861; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
1862; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v4
1863; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v14
1864; GFX9-O0-NEXT:    s_mov_b32 s8, s10
1865; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v15
1866; GFX9-O0-NEXT:    s_mov_b32 s10, s11
1867; GFX9-O0-NEXT:    v_add_co_u32_e64 v10, s[8:9], v10, s8
1868; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s10
1869; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v4, v11, s[8:9]
1870; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
1871; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v4
1872; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v11
1873; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
1874; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[12:13], s[8:9]
1875; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[8:9]
1876; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
1877; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
1878; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
1879; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
1880; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
1881; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
1882; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
1883; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
1884; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
1885; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
1886; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
1887; GFX9-O0-NEXT:    s_mov_b32 s10, s6
1888; GFX9-O0-NEXT:    s_mov_b32 s11, s7
1889; GFX9-O0-NEXT:    v_sub_co_u32_e32 v4, vcc, v4, v7
1890; GFX9-O0-NEXT:    v_subb_co_u32_e32 v8, vcc, v5, v6, vcc
1891; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s10
1892; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s10
1893; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v6, vcc
1894; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s11
1895; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
1896; GFX9-O0-NEXT:    v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
1897; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
1898; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
1899; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
1900; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
1901; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
1902; GFX9-O0-NEXT:    s_nop 0
1903; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
1904; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
1905; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
1906; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
1907; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
1908; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
1909; GFX9-O0-NEXT:    s_nop 0
1910; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
1911; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
1912; GFX9-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
1913; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
1914; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[14:15]
1915; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
1916; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[14:15]
1917; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
1918; GFX9-O0-NEXT:    v_and_b32_e64 v6, 1, v6
1919; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[8:9], v6, 1
1920; GFX9-O0-NEXT:    s_or_b64 s[8:9], s[4:5], s[8:9]
1921; GFX9-O0-NEXT:    s_mov_b64 s[4:5], -1
1922; GFX9-O0-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
1923; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
1924; GFX9-O0-NEXT:    s_mov_b32 s14, s13
1925; GFX9-O0-NEXT:    v_xor_b32_e64 v6, v6, s14
1926; GFX9-O0-NEXT:    ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
1927; GFX9-O0-NEXT:    v_xor_b32_e64 v4, v4, s12
1928; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
1929; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
1930; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
1931; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v8
1932; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v9
1933; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
1934; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
1935; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
1936; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
1937; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
1938; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
1939; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
1940; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v1, v4, s[8:9]
1941; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s10
1942; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[8:9]
1943; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
1944; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
1945; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1946; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
1947; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
1948; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v3, v4, s[8:9]
1949; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s10
1950; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[8:9]
1951; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
1952; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
1953; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
1954; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
1955; GFX9-O0-NEXT:    s_and_b64 s[6:7], s[4:5], s[6:7]
1956; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
1957; GFX9-O0-NEXT:    s_nop 0
1958; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
1959; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1960; GFX9-O0-NEXT:    s_nop 0
1961; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
1962; GFX9-O0-NEXT:    s_mov_b64 s[4:5], exec
1963; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 2
1964; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 3
1965; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
1966; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
1967; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
1968; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
1969; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
1970; GFX9-O0-NEXT:    s_cbranch_execz .LBB1_3
1971; GFX9-O0-NEXT:    s_branch .LBB1_8
1972; GFX9-O0-NEXT:  .LBB1_1: ; %Flow
1973; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
1974; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
1975; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
1976; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
1977; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 4
1978; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 5
1979; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
1980; GFX9-O0-NEXT:  ; %bb.2: ; %Flow
1981; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
1982; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
1983; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
1984; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
1985; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
1986; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
1987; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
1988; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
1989; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
1990; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
1991; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
1992; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
1993; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
1994; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
1995; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
1996; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
1997; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
1998; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
1999; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
2000; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
2001; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
2002; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
2003; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
2004; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
2005; GFX9-O0-NEXT:    s_branch .LBB1_5
2006; GFX9-O0-NEXT:  .LBB1_3: ; %Flow2
2007; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
2008; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
2009; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
2010; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2011; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 2
2012; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 3
2013; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
2014; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
2015; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
2016; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
2017; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
2018; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
2019; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
2020; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
2021; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
2022; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
2023; GFX9-O0-NEXT:    s_nop 0
2024; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
2025; GFX9-O0-NEXT:    s_branch .LBB1_9
2026; GFX9-O0-NEXT:  .LBB1_4: ; %udiv-loop-exit
2027; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
2028; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
2029; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
2030; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
2031; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
2032; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
2033; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
2034; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
2035; GFX9-O0-NEXT:    s_mov_b32 s4, 1
2036; GFX9-O0-NEXT:    s_waitcnt vmcnt(2)
2037; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s4, v[0:1]
2038; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2039; GFX9-O0-NEXT:    v_lshlrev_b64 v[9:10], s4, v[9:10]
2040; GFX9-O0-NEXT:    s_mov_b32 s4, 63
2041; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
2042; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v1
2043; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
2044; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v8
2045; GFX9-O0-NEXT:    v_or3_b32 v4, v4, v11, v12
2046; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
2047; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v9
2048; GFX9-O0-NEXT:    v_or3_b32 v0, v0, v1, v7
2049; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2050; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
2051; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
2052; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v6
2053; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v7
2054; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
2055; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
2056; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
2057; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
2058; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
2059; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
2060; GFX9-O0-NEXT:    s_nop 0
2061; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
2062; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
2063; GFX9-O0-NEXT:    s_nop 0
2064; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
2065; GFX9-O0-NEXT:    s_branch .LBB1_3
2066; GFX9-O0-NEXT:  .LBB1_5: ; %Flow1
2067; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
2068; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
2069; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
2070; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2071; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 6
2072; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 7
2073; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
2074; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
2075; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
2076; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
2077; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
2078; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
2079; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
2080; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
2081; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
2082; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
2083; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
2084; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
2085; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
2086; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
2087; GFX9-O0-NEXT:    s_nop 0
2088; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
2089; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
2090; GFX9-O0-NEXT:    s_nop 0
2091; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
2092; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
2093; GFX9-O0-NEXT:    s_nop 0
2094; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
2095; GFX9-O0-NEXT:    s_branch .LBB1_4
2096; GFX9-O0-NEXT:  .LBB1_6: ; %udiv-do-while
2097; GFX9-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
2098; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
2099; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
2100; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
2101; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2102; GFX9-O0-NEXT:    v_readlane_b32 s6, v30, 8
2103; GFX9-O0-NEXT:    v_readlane_b32 s7, v30, 9
2104; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
2105; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
2106; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
2107; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
2108; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
2109; GFX9-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
2110; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
2111; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
2112; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
2113; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
2114; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
2115; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
2116; GFX9-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
2117; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
2118; GFX9-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
2119; GFX9-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
2120; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
2121; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
2122; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
2123; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
2124; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
2125; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
2126; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
2127; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
2128; GFX9-O0-NEXT:    s_mov_b32 s4, 63
2129; GFX9-O0-NEXT:    s_waitcnt vmcnt(16)
2130; GFX9-O0-NEXT:    v_lshrrev_b64 v[28:29], s4, v[2:3]
2131; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v29
2132; GFX9-O0-NEXT:    s_mov_b32 s5, 1
2133; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], s5, v[22:23]
2134; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v23
2135; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
2136; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v28
2137; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v22
2138; GFX9-O0-NEXT:    v_or_b32_e64 v22, v5, v10
2139; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
2140; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v4
2141; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[2:3]
2142; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], s4, v[6:7]
2143; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v29
2144; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
2145; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
2146; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v28
2147; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
2148; GFX9-O0-NEXT:    v_or_b32_e64 v4, v3, v4
2149; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2150; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
2151; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s5, v[0:1]
2152; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[6:7]
2153; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
2154; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
2155; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v29
2156; GFX9-O0-NEXT:    s_waitcnt vmcnt(10)
2157; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v27
2158; GFX9-O0-NEXT:    v_or3_b32 v6, v6, v7, v10
2159; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
2160; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v28
2161; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v26
2162; GFX9-O0-NEXT:    v_or3_b32 v0, v0, v1, v7
2163; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2164; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
2165; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
2166; GFX9-O0-NEXT:    s_waitcnt vmcnt(8)
2167; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v25
2168; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
2169; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
2170; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v24
2171; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
2172; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
2173; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
2174; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
2175; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
2176; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v22
2177; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
2178; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
2179; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
2180; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v14
2181; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v15
2182; GFX9-O0-NEXT:    v_sub_co_u32_e32 v13, vcc, v13, v6
2183; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2184; GFX9-O0-NEXT:    v_subb_co_u32_e32 v12, vcc, v12, v10, vcc
2185; GFX9-O0-NEXT:    v_subb_co_u32_e32 v11, vcc, v11, v4, vcc
2186; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
2187; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
2188; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
2189; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
2190; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
2191; GFX9-O0-NEXT:    v_ashrrev_i64 v[13:14], s4, v[11:12]
2192; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
2193; GFX9-O0-NEXT:    s_mov_b64 s[4:5], 1
2194; GFX9-O0-NEXT:    s_mov_b32 s8, s5
2195; GFX9-O0-NEXT:    v_and_b32_e64 v12, v7, s8
2196; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v13
2197; GFX9-O0-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
2198; GFX9-O0-NEXT:    v_and_b32_e64 v14, v11, s4
2199; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
2200; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v12
2201; GFX9-O0-NEXT:    v_mov_b32_e32 v12, 0
2202; GFX9-O0-NEXT:    v_mov_b32_e32 v13, 0
2203; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v21
2204; GFX9-O0-NEXT:    v_and_b32_e64 v22, v7, v22
2205; GFX9-O0-NEXT:    v_and_b32_e64 v20, v11, v20
2206; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
2207; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v22
2208; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v19
2209; GFX9-O0-NEXT:    v_and_b32_e64 v7, v7, v22
2210; GFX9-O0-NEXT:    v_and_b32_e64 v22, v11, v18
2211; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
2212; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v7
2213; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v22
2214; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v23
2215; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
2216; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v21
2217; GFX9-O0-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v19
2218; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
2219; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
2220; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
2221; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2222; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2223; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2224; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
2225; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2226; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2227; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
2228; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v10
2229; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v8
2230; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 killed $vgpr8_vgpr9 killed $exec
2231; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
2232; GFX9-O0-NEXT:    s_mov_b32 s5, s8
2233; GFX9-O0-NEXT:    s_mov_b32 s4, s9
2234; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v16
2235; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
2236; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
2237; GFX9-O0-NEXT:    v_add_co_u32_e32 v19, vcc, v11, v16
2238; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s4
2239; GFX9-O0-NEXT:    v_addc_co_u32_e32 v9, vcc, v9, v11, vcc
2240; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s5
2241; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
2242; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s4
2243; GFX9-O0-NEXT:    v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
2244; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2245; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2246; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
2247; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v9
2248; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2249; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2250; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
2251; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v8
2252; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v16
2253; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v17
2254; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v19
2255; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
2256; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v17
2257; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v20
2258; GFX9-O0-NEXT:    v_or_b32_e64 v18, v18, v21
2259; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v16
2260; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v19
2261; GFX9-O0-NEXT:    v_or_b32_e64 v16, v16, v17
2262; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
2263; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v18
2264; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13]
2265; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
2266; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v3
2267; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v2
2268; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
2269; GFX9-O0-NEXT:    s_nop 0
2270; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
2271; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v1
2272; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v0
2273; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
2274; GFX9-O0-NEXT:    s_nop 0
2275; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
2276; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v15
2277; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v14
2278; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
2279; GFX9-O0-NEXT:    s_nop 0
2280; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
2281; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v13
2282; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v12
2283; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
2284; GFX9-O0-NEXT:    s_nop 0
2285; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
2286; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
2287; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 4
2288; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 5
2289; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
2290; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 8
2291; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 9
2292; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
2293; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
2294; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
2295; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
2296; GFX9-O0-NEXT:    s_nop 0
2297; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
2298; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
2299; GFX9-O0-NEXT:    s_nop 0
2300; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
2301; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
2302; GFX9-O0-NEXT:    s_nop 0
2303; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
2304; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
2305; GFX9-O0-NEXT:    s_nop 0
2306; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
2307; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
2308; GFX9-O0-NEXT:    s_nop 0
2309; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
2310; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
2311; GFX9-O0-NEXT:    s_nop 0
2312; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
2313; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
2314; GFX9-O0-NEXT:    s_nop 0
2315; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
2316; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
2317; GFX9-O0-NEXT:    s_nop 0
2318; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
2319; GFX9-O0-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2320; GFX9-O0-NEXT:    s_cbranch_execnz .LBB1_6
2321; GFX9-O0-NEXT:    s_branch .LBB1_1
2322; GFX9-O0-NEXT:  .LBB1_7: ; %udiv-preheader
2323; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
2324; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
2325; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
2326; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
2327; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
2328; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
2329; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
2330; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
2331; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
2332; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
2333; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
2334; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
2335; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
2336; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
2337; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
2338; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
2339; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
2340; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
2341; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
2342; GFX9-O0-NEXT:    s_waitcnt vmcnt(9)
2343; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
2344; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2345; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], v4, v[20:21]
2346; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
2347; GFX9-O0-NEXT:    s_mov_b32 s6, 64
2348; GFX9-O0-NEXT:    v_sub_u32_e64 v12, s6, v4
2349; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], v12, v[18:19]
2350; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v23
2351; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v12
2352; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
2353; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v22
2354; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
2355; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
2356; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
2357; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
2358; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v4, s6
2359; GFX9-O0-NEXT:    v_sub_u32_e64 v5, v4, s6
2360; GFX9-O0-NEXT:    v_lshrrev_b64 v[22:23], v5, v[18:19]
2361; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
2362; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[4:5]
2363; GFX9-O0-NEXT:    s_mov_b32 s6, 0
2364; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v4, s6
2365; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v21
2366; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[6:7]
2367; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
2368; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v22
2369; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
2370; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v20
2371; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[6:7]
2372; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2373; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2374; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
2375; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
2376; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], v4, v[18:19]
2377; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v5
2378; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
2379; GFX9-O0-NEXT:    s_mov_b32 s8, s7
2380; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s8
2381; GFX9-O0-NEXT:    v_cndmask_b32_e64 v12, v12, v15, s[4:5]
2382; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
2383; GFX9-O0-NEXT:    s_mov_b32 s8, s6
2384; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
2385; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
2386; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2387; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2388; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2389; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
2390; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v13
2391; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v14
2392; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
2393; GFX9-O0-NEXT:    s_mov_b32 s5, s8
2394; GFX9-O0-NEXT:    s_mov_b32 s4, s9
2395; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v16
2396; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v17
2397; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
2398; GFX9-O0-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v16
2399; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s4
2400; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v15, v16, vcc
2401; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s5
2402; GFX9-O0-NEXT:    v_addc_co_u32_e32 v14, vcc, v14, v15, vcc
2403; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s4
2404; GFX9-O0-NEXT:    v_addc_co_u32_e32 v13, vcc, v13, v15, vcc
2405; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2406; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2407; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
2408; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v13
2409; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2410; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2411; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
2412; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v16
2413; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
2414; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
2415; GFX9-O0-NEXT:    s_nop 0
2416; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
2417; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
2418; GFX9-O0-NEXT:    s_nop 0
2419; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
2420; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[6:7]
2421; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s9
2422; GFX9-O0-NEXT:    v_mov_b32_e32 v14, s8
2423; GFX9-O0-NEXT:    v_mov_b32_e32 v13, s7
2424; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s6
2425; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 8
2426; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 9
2427; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
2428; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
2429; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
2430; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
2431; GFX9-O0-NEXT:    s_nop 0
2432; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
2433; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
2434; GFX9-O0-NEXT:    s_nop 0
2435; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
2436; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
2437; GFX9-O0-NEXT:    s_nop 0
2438; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
2439; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
2440; GFX9-O0-NEXT:    s_nop 0
2441; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
2442; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
2443; GFX9-O0-NEXT:    s_nop 0
2444; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
2445; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
2446; GFX9-O0-NEXT:    s_nop 0
2447; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
2448; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
2449; GFX9-O0-NEXT:    s_nop 0
2450; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
2451; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
2452; GFX9-O0-NEXT:    s_nop 0
2453; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
2454; GFX9-O0-NEXT:    s_branch .LBB1_6
2455; GFX9-O0-NEXT:  .LBB1_8: ; %udiv-bb1
2456; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
2457; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
2458; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
2459; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
2460; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
2461; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
2462; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
2463; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
2464; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
2465; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
2466; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
2467; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 1
2468; GFX9-O0-NEXT:    s_mov_b32 s5, s6
2469; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
2470; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
2471; GFX9-O0-NEXT:    s_mov_b32 s4, s7
2472; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
2473; GFX9-O0-NEXT:    s_mov_b32 s8, s6
2474; GFX9-O0-NEXT:    s_mov_b32 s9, s7
2475; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v4
2476; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
2477; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
2478; GFX9-O0-NEXT:    v_add_co_u32_e32 v8, vcc, v3, v4
2479; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s4
2480; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2481; GFX9-O0-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v4, vcc
2482; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
2483; GFX9-O0-NEXT:    v_addc_co_u32_e32 v0, vcc, v0, v4, vcc
2484; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s9
2485; GFX9-O0-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
2486; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2487; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2488; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
2489; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
2490; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2491; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2492; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2493; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
2494; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
2495; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
2496; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
2497; GFX9-O0-NEXT:    s_nop 0
2498; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
2499; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
2500; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
2501; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
2502; GFX9-O0-NEXT:    s_nop 0
2503; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
2504; GFX9-O0-NEXT:    s_mov_b32 s4, 0x7f
2505; GFX9-O0-NEXT:    v_sub_u32_e64 v2, s4, v3
2506; GFX9-O0-NEXT:    v_lshlrev_b64 v[4:5], v2, v[10:11]
2507; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
2508; GFX9-O0-NEXT:    s_mov_b32 s4, 64
2509; GFX9-O0-NEXT:    v_sub_u32_e64 v13, s4, v2
2510; GFX9-O0-NEXT:    v_lshrrev_b64 v[13:14], v13, v[6:7]
2511; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v14
2512; GFX9-O0-NEXT:    v_or_b32_e64 v12, v12, v15
2513; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
2514; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
2515; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
2516; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2517; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
2518; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v5
2519; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v2, s4
2520; GFX9-O0-NEXT:    s_mov_b32 s10, 63
2521; GFX9-O0-NEXT:    v_sub_u32_e64 v3, s10, v3
2522; GFX9-O0-NEXT:    v_lshlrev_b64 v[12:13], v3, v[6:7]
2523; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
2524; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[4:5]
2525; GFX9-O0-NEXT:    s_mov_b32 s10, 0
2526; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[10:11], v2, s10
2527; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v11
2528; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[10:11]
2529; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
2530; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
2531; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
2532; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
2533; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[10:11]
2534; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
2535; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
2536; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2537; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v3
2538; GFX9-O0-NEXT:    v_lshlrev_b64 v[6:7], v2, v[6:7]
2539; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v7
2540; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s9
2541; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[4:5]
2542; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
2543; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s8
2544; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v3, v6, s[4:5]
2545; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2546; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2547; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
2548; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v2
2549; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
2550; GFX9-O0-NEXT:    s_nop 0
2551; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
2552; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
2553; GFX9-O0-NEXT:    s_nop 0
2554; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
2555; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
2556; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v9
2557; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
2558; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
2559; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v8
2560; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v1
2561; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2562; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
2563; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7]
2564; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
2565; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s8
2566; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s9
2567; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s6
2568; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s7
2569; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
2570; GFX9-O0-NEXT:    s_nop 0
2571; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
2572; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
2573; GFX9-O0-NEXT:    s_nop 0
2574; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
2575; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
2576; GFX9-O0-NEXT:    s_nop 0
2577; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
2578; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
2579; GFX9-O0-NEXT:    s_nop 0
2580; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
2581; GFX9-O0-NEXT:    s_mov_b64 s[6:7], exec
2582; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
2583; GFX9-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
2584; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 6
2585; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 7
2586; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
2587; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
2588; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
2589; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
2590; GFX9-O0-NEXT:    s_cbranch_execz .LBB1_5
2591; GFX9-O0-NEXT:    s_branch .LBB1_7
2592; GFX9-O0-NEXT:  .LBB1_9: ; %udiv-end
2593; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
2594; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
2595; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
2596; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
2597; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
2598; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
2599; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
2600; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
2601; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
2602; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
2603; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
2604; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
2605; GFX9-O0-NEXT:    s_mov_b32 s4, 32
2606; GFX9-O0-NEXT:    s_waitcnt vmcnt(2)
2607; GFX9-O0-NEXT:    v_lshrrev_b64 v[2:3], s4, v[6:7]
2608; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
2609; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
2610; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v12
2611; GFX9-O0-NEXT:    v_mul_lo_u32 v4, v5, v2
2612; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2613; GFX9-O0-NEXT:    v_lshrrev_b64 v[12:13], s4, v[12:13]
2614; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v12
2615; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
2616; GFX9-O0-NEXT:    v_mul_lo_u32 v3, v6, v3
2617; GFX9-O0-NEXT:    v_mad_u64_u32 v[12:13], s[6:7], v6, v2, 0
2618; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v13
2619; GFX9-O0-NEXT:    v_add3_u32 v2, v2, v3, v4
2620; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
2621; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2622; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2623; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
2624; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
2625; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
2626; GFX9-O0-NEXT:    v_lshlrev_b64 v[3:4], s4, v[2:3]
2627; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v4
2628; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 killed $vgpr12_vgpr13 killed $exec
2629; GFX9-O0-NEXT:    s_mov_b32 s5, 0
2630; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2631; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s5
2632; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
2633; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v2
2634; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v13
2635; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v7
2636; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v3
2637; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v12
2638; GFX9-O0-NEXT:    v_or_b32_e64 v12, v3, v4
2639; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
2640; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v2
2641; GFX9-O0-NEXT:    v_lshrrev_b64 v[2:3], s4, v[14:15]
2642; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v2
2643; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v10
2644; GFX9-O0-NEXT:    v_mul_lo_u32 v3, v2, v7
2645; GFX9-O0-NEXT:    v_lshrrev_b64 v[10:11], s4, v[10:11]
2646; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec
2647; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v14
2648; GFX9-O0-NEXT:    v_mul_lo_u32 v10, v10, v4
2649; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v2, v4, 0
2650; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v15
2651; GFX9-O0-NEXT:    v_add3_u32 v2, v2, v3, v10
2652; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2653; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
2654; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
2655; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s6
2656; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
2657; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
2658; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s4, v[2:3]
2659; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v3
2660; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
2661; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2662; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s5
2663; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
2664; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v10
2665; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v15
2666; GFX9-O0-NEXT:    v_or_b32_e64 v10, v10, v11
2667; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
2668; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v14
2669; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
2670; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
2671; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
2672; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v2
2673; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v12
2674; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
2675; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
2676; GFX9-O0-NEXT:    v_add_co_u32_e64 v12, s[6:7], v10, v11
2677; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7]
2678; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
2679; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v2
2680; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v7, v5, 0
2681; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v14
2682; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2683; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s5
2684; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
2685; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
2686; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v3
2687; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v15
2688; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2689; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
2690; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
2691; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s6
2692; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
2693; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v11
2694; GFX9-O0-NEXT:    v_lshlrev_b64 v[14:15], s4, v[14:15]
2695; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v15
2696; GFX9-O0-NEXT:    v_or_b32_e64 v10, v10, v11
2697; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec
2698; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v14
2699; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
2700; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
2701; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
2702; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v7, v6, 0
2703; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v14
2704; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2705; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s5
2706; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
2707; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v7
2708; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v11
2709; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v15
2710; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2711; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
2712; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
2713; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s6
2714; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
2715; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
2716; GFX9-O0-NEXT:    v_lshlrev_b64 v[14:15], s4, v[14:15]
2717; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v15
2718; GFX9-O0-NEXT:    v_or_b32_e64 v7, v7, v16
2719; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec
2720; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v14
2721; GFX9-O0-NEXT:    v_or_b32_e64 v18, v10, v11
2722; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
2723; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v7
2724; GFX9-O0-NEXT:    v_mad_u64_u32 v[10:11], s[6:7], v4, v6, 0
2725; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v11
2726; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2727; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s5
2728; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
2729; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v6
2730; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v18
2731; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
2732; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v19
2733; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v17
2734; GFX9-O0-NEXT:    v_add_co_u32_e64 v6, s[6:7], v6, v15
2735; GFX9-O0-NEXT:    v_addc_co_u32_e64 v14, s[6:7], v7, v14, s[6:7]
2736; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
2737; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
2738; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v7
2739; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0xffffffff
2740; GFX9-O0-NEXT:    s_mov_b32 s8, s7
2741; GFX9-O0-NEXT:    v_and_b32_e64 v14, v14, s8
2742; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v6
2743; GFX9-O0-NEXT:    ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7
2744; GFX9-O0-NEXT:    v_and_b32_e64 v16, v15, s6
2745; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
2746; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v14
2747; GFX9-O0-NEXT:    v_mad_u64_u32 v[14:15], s[6:7], v4, v5, 0
2748; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v14
2749; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2750; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
2751; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
2752; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v4
2753; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v19
2754; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v15
2755; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2756; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
2757; GFX9-O0-NEXT:    ; implicit-def: $sgpr7
2758; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s6
2759; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
2760; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v5
2761; GFX9-O0-NEXT:    v_lshlrev_b64 v[14:15], s4, v[14:15]
2762; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v15
2763; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
2764; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v18
2765; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
2766; GFX9-O0-NEXT:    v_or_b32_e64 v18, v5, v14
2767; GFX9-O0-NEXT:    ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
2768; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v4
2769; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v18
2770; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
2771; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v19
2772; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v17
2773; GFX9-O0-NEXT:    v_add_co_u32_e64 v4, s[6:7], v4, v15
2774; GFX9-O0-NEXT:    v_addc_co_u32_e64 v14, s[6:7], v5, v14, s[6:7]
2775; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2776; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v14
2777; GFX9-O0-NEXT:    v_lshrrev_b64 v[16:17], s4, v[4:5]
2778; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], s4, v[6:7]
2779; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v6
2780; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
2781; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
2782; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v17
2783; GFX9-O0-NEXT:    v_add_co_u32_e64 v14, s[6:7], v14, v15
2784; GFX9-O0-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v6, v7, s[6:7]
2785; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
2786; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v6
2787; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v2
2788; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
2789; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
2790; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v15
2791; GFX9-O0-NEXT:    v_add_co_u32_e64 v14, s[6:7], v6, v7
2792; GFX9-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7]
2793; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
2794; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v2
2795; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v14
2796; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v12
2797; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v15
2798; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v13
2799; GFX9-O0-NEXT:    v_add_co_u32_e64 v2, s[6:7], v2, v7
2800; GFX9-O0-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v3, v6, s[6:7]
2801; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
2802; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
2803; GFX9-O0-NEXT:    v_lshlrev_b64 v[5:6], s4, v[4:5]
2804; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
2805; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec
2806; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
2807; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
2808; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
2809; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v4
2810; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v11
2811; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v7
2812; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
2813; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
2814; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v6
2815; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
2816; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
2817; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
2818; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
2819; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
2820; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
2821; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v0
2822; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v1
2823; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v8
2824; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v9
2825; GFX9-O0-NEXT:    v_sub_co_u32_e32 v5, vcc, v5, v7
2826; GFX9-O0-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v6, vcc
2827; GFX9-O0-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
2828; GFX9-O0-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
2829; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
2830; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
2831; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
2832; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
2833; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
2834; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
2835; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
2836; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v0
2837; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v5
2838; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
2839; GFX9-O0-NEXT:    v_lshrrev_b64 v[5:6], s4, v[5:6]
2840; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
2841; GFX9-O0-NEXT:    v_lshrrev_b64 v[3:4], s4, v[3:4]
2842; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
2843; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
2844; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
2845; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
2846; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2847; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
2848  %div = urem i128 %lhs, %rhs
2849  ret i128 %div
2850}
2851
2852define i128 @v_srem_i128_v_pow2k(i128 %lhs) {
2853; GFX9-LABEL: v_srem_i128_v_pow2k:
2854; GFX9:       ; %bb.0:
2855; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2856; GFX9-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
2857; GFX9-NEXT:    v_mov_b32_e32 v5, v4
2858; GFX9-NEXT:    v_lshrrev_b64 v[4:5], 31, v[4:5]
2859; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v4
2860; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v1, v5, vcc
2861; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v2, vcc
2862; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v3, vcc
2863; GFX9-NEXT:    v_and_b32_e32 v4, -2, v4
2864; GFX9-NEXT:    v_subrev_co_u32_e32 v0, vcc, 0, v0
2865; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v4, vcc
2866; GFX9-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v5, vcc
2867; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v6, vcc
2868; GFX9-NEXT:    s_setpc_b64 s[30:31]
2869;
2870; GFX9-O0-LABEL: v_srem_i128_v_pow2k:
2871; GFX9-O0:       ; %bb.0:
2872; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2873; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v2
2874; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
2875; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2876; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2877; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
2878; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
2879; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2880; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2881; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2882; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
2883; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
2884; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
2885; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v0
2886; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v1
2887; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
2888; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v7
2889; GFX9-O0-NEXT:    s_mov_b32 s4, 63
2890; GFX9-O0-NEXT:    v_ashrrev_i64 v[6:7], s4, v[6:7]
2891; GFX9-O0-NEXT:    s_mov_b32 s4, 31
2892; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], s4, v[6:7]
2893; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v6
2894; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v7
2895; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
2896; GFX9-O0-NEXT:    s_mov_b32 s5, s6
2897; GFX9-O0-NEXT:    s_mov_b32 s4, s7
2898; GFX9-O0-NEXT:    v_add_co_u32_e32 v6, vcc, v5, v4
2899; GFX9-O0-NEXT:    v_addc_co_u32_e32 v4, vcc, v0, v2, vcc
2900; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s5
2901; GFX9-O0-NEXT:    v_addc_co_u32_e32 v8, vcc, v3, v2, vcc
2902; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s4
2903; GFX9-O0-NEXT:    v_addc_co_u32_e32 v2, vcc, v1, v2, vcc
2904; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2905; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2906; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
2907; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v4
2908; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v7
2909; GFX9-O0-NEXT:    s_mov_b32 s6, -2
2910; GFX9-O0-NEXT:    s_mov_b32 s4, 0
2911; GFX9-O0-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
2912; GFX9-O0-NEXT:    s_mov_b32 s5, s6
2913; GFX9-O0-NEXT:    s_mov_b32 s6, s5
2914; GFX9-O0-NEXT:    v_and_b32_e64 v4, v4, s6
2915; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
2916; GFX9-O0-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
2917; GFX9-O0-NEXT:    v_and_b32_e64 v9, v6, s4
2918; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
2919; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v4
2920; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v9
2921; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
2922; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2923; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2924; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
2925; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v2
2926; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
2927; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v9
2928; GFX9-O0-NEXT:    v_sub_co_u32_e32 v5, vcc, v5, v7
2929; GFX9-O0-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v6, vcc
2930; GFX9-O0-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
2931; GFX9-O0-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
2932; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2933; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2934; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
2935; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
2936; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2937; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2938; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
2939; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v0
2940; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v5
2941; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
2942; GFX9-O0-NEXT:    s_mov_b32 s4, 32
2943; GFX9-O0-NEXT:    v_lshrrev_b64 v[5:6], s4, v[5:6]
2944; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
2945; GFX9-O0-NEXT:    v_lshrrev_b64 v[3:4], s4, v[3:4]
2946; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
2947; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
2948  %div = srem i128 %lhs, 8589934592
2949  ret i128 %div
2950}
2951
2952define i128 @v_urem_i128_v_pow2k(i128 %lhs) {
2953; GFX9-LABEL: v_urem_i128_v_pow2k:
2954; GFX9:       ; %bb.0:
2955; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2956; GFX9-NEXT:    v_and_b32_e32 v1, 1, v1
2957; GFX9-NEXT:    v_mov_b32_e32 v2, 0
2958; GFX9-NEXT:    v_mov_b32_e32 v3, 0
2959; GFX9-NEXT:    s_setpc_b64 s[30:31]
2960;
2961; GFX9-O0-LABEL: v_urem_i128_v_pow2k:
2962; GFX9-O0:       ; %bb.0:
2963; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2964; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 ; 4-byte Folded Spill
2965; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
2966; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
2967; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
2968; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2969; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2970; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
2971; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr1 killed $exec
2972; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2973; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2974; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2975; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2976; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
2977; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
2978; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
2979; GFX9-O0-NEXT:    s_mov_b32 s6, 1
2980; GFX9-O0-NEXT:    s_mov_b32 s4, -1
2981; GFX9-O0-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
2982; GFX9-O0-NEXT:    s_mov_b32 s5, s6
2983; GFX9-O0-NEXT:    s_mov_b32 s6, s5
2984; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
2985; GFX9-O0-NEXT:    v_and_b32_e64 v3, v2, s6
2986; GFX9-O0-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
2987; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
2988; GFX9-O0-NEXT:    v_and_b32_e64 v1, v0, s4
2989; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
2990; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
2991; GFX9-O0-NEXT:    s_mov_b32 s4, 32
2992; GFX9-O0-NEXT:    v_lshrrev_b64 v[1:2], s4, v[1:2]
2993; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec
2994; GFX9-O0-NEXT:    v_mov_b32_e32 v3, 0
2995; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
2996; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
2997  %div = urem i128 %lhs, 8589934592
2998  ret i128 %div
2999}
3000
3001;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
3002; GFX9-SDAG: {{.*}}
3003; GFX9-SDAG-O0: {{.*}}
3004