xref: /llvm-project/llvm/test/CodeGen/AMDGPU/div_i128.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9 %s
3; RUN: llc -O0 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9-O0 %s
4
5; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9-G %s
6; RUN: llc -O0 -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9-G-O0 %s
7
8define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
9; GFX9-LABEL: v_sdiv_i128_vv:
10; GFX9:       ; %bb.0: ; %_udiv-special-cases
11; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX9-NEXT:    v_sub_co_u32_e32 v8, vcc, 0, v0
13; GFX9-NEXT:    v_subb_co_u32_e32 v9, vcc, 0, v1, vcc
14; GFX9-NEXT:    v_subb_co_u32_e32 v10, vcc, 0, v2, vcc
15; GFX9-NEXT:    v_subb_co_u32_e32 v11, vcc, 0, v3, vcc
16; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
17; GFX9-NEXT:    v_ashrrev_i32_e32 v16, 31, v3
18; GFX9-NEXT:    v_cndmask_b32_e32 v9, v1, v9, vcc
19; GFX9-NEXT:    v_cndmask_b32_e32 v8, v0, v8, vcc
20; GFX9-NEXT:    v_cndmask_b32_e32 v11, v3, v11, vcc
21; GFX9-NEXT:    v_cndmask_b32_e32 v10, v2, v10, vcc
22; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, 0, v4
23; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, 0, v5, vcc
24; GFX9-NEXT:    v_subb_co_u32_e32 v2, vcc, 0, v6, vcc
25; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, 0, v7, vcc
26; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[6:7]
27; GFX9-NEXT:    v_ashrrev_i32_e32 v17, 31, v7
28; GFX9-NEXT:    v_cndmask_b32_e32 v20, v5, v1, vcc
29; GFX9-NEXT:    v_cndmask_b32_e32 v21, v4, v0, vcc
30; GFX9-NEXT:    v_cndmask_b32_e32 v1, v7, v3, vcc
31; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v2, vcc
32; GFX9-NEXT:    v_or_b32_e32 v3, v20, v1
33; GFX9-NEXT:    v_or_b32_e32 v2, v21, v0
34; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
35; GFX9-NEXT:    v_or_b32_e32 v3, v9, v11
36; GFX9-NEXT:    v_or_b32_e32 v2, v8, v10
37; GFX9-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[2:3]
38; GFX9-NEXT:    v_ffbh_u32_e32 v2, v0
39; GFX9-NEXT:    v_add_u32_e32 v2, 32, v2
40; GFX9-NEXT:    v_ffbh_u32_e32 v3, v1
41; GFX9-NEXT:    v_min_u32_e32 v2, v2, v3
42; GFX9-NEXT:    v_ffbh_u32_e32 v3, v21
43; GFX9-NEXT:    v_add_u32_e32 v3, 32, v3
44; GFX9-NEXT:    v_ffbh_u32_e32 v4, v20
45; GFX9-NEXT:    v_min_u32_e32 v3, v3, v4
46; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
47; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, 64, v3
48; GFX9-NEXT:    v_addc_co_u32_e64 v4, s[6:7], 0, 0, vcc
49; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
50; GFX9-NEXT:    v_ffbh_u32_e32 v6, v11
51; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
52; GFX9-NEXT:    v_ffbh_u32_e32 v3, v10
53; GFX9-NEXT:    v_add_u32_e32 v3, 32, v3
54; GFX9-NEXT:    v_min_u32_e32 v3, v3, v6
55; GFX9-NEXT:    v_ffbh_u32_e32 v6, v8
56; GFX9-NEXT:    v_add_u32_e32 v6, 32, v6
57; GFX9-NEXT:    v_ffbh_u32_e32 v7, v9
58; GFX9-NEXT:    v_min_u32_e32 v6, v6, v7
59; GFX9-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
60; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, 64, v6
61; GFX9-NEXT:    v_addc_co_u32_e64 v7, s[6:7], 0, 0, vcc
62; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
63; GFX9-NEXT:    v_mov_b32_e32 v5, 0
64; GFX9-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
65; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, 0, vcc
66; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v3
67; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v4, v7, vcc
68; GFX9-NEXT:    v_subbrev_co_u32_e32 v4, vcc, 0, v5, vcc
69; GFX9-NEXT:    v_subbrev_co_u32_e32 v5, vcc, 0, v5, vcc
70; GFX9-NEXT:    s_mov_b64 s[6:7], 0x7f
71; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
72; GFX9-NEXT:    v_mov_b32_e32 v18, v16
73; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
74; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
75; GFX9-NEXT:    v_mov_b32_e32 v19, v17
76; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
77; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
78; GFX9-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
79; GFX9-NEXT:    v_and_b32_e32 v6, 1, v6
80; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v6
81; GFX9-NEXT:    v_xor_b32_e32 v6, 0x7f, v2
82; GFX9-NEXT:    v_or_b32_e32 v7, v3, v5
83; GFX9-NEXT:    v_or_b32_e32 v6, v6, v4
84; GFX9-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
85; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
86; GFX9-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
87; GFX9-NEXT:    v_cndmask_b32_e64 v13, v11, 0, s[4:5]
88; GFX9-NEXT:    v_cndmask_b32_e64 v12, v10, 0, s[4:5]
89; GFX9-NEXT:    v_cndmask_b32_e64 v7, v9, 0, s[4:5]
90; GFX9-NEXT:    v_cndmask_b32_e64 v6, v8, 0, s[4:5]
91; GFX9-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
92; GFX9-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
93; GFX9-NEXT:    s_cbranch_execz .LBB0_6
94; GFX9-NEXT:  ; %bb.1: ; %udiv-bb1
95; GFX9-NEXT:    v_add_co_u32_e32 v22, vcc, 1, v2
96; GFX9-NEXT:    v_addc_co_u32_e32 v23, vcc, 0, v3, vcc
97; GFX9-NEXT:    v_addc_co_u32_e32 v24, vcc, 0, v4, vcc
98; GFX9-NEXT:    v_sub_u32_e32 v7, 0x7f, v2
99; GFX9-NEXT:    v_addc_co_u32_e32 v25, vcc, 0, v5, vcc
100; GFX9-NEXT:    v_sub_u32_e32 v12, 64, v7
101; GFX9-NEXT:    v_or_b32_e32 v4, v23, v25
102; GFX9-NEXT:    v_or_b32_e32 v3, v22, v24
103; GFX9-NEXT:    v_lshlrev_b64 v[5:6], v7, v[10:11]
104; GFX9-NEXT:    v_lshrrev_b64 v[12:13], v12, v[8:9]
105; GFX9-NEXT:    v_sub_u32_e32 v2, 63, v2
106; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[3:4]
107; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, v[8:9]
108; GFX9-NEXT:    v_or_b32_e32 v4, v6, v13
109; GFX9-NEXT:    v_or_b32_e32 v5, v5, v12
110; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v7
111; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
112; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
113; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v7, v[8:9]
114; GFX9-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v7
115; GFX9-NEXT:    v_mov_b32_e32 v6, 0
116; GFX9-NEXT:    v_mov_b32_e32 v12, 0
117; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[6:7]
118; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[6:7]
119; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, v5, s[4:5]
120; GFX9-NEXT:    v_mov_b32_e32 v7, 0
121; GFX9-NEXT:    v_mov_b32_e32 v13, 0
122; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
123; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
124; GFX9-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
125; GFX9-NEXT:    s_cbranch_execz .LBB0_5
126; GFX9-NEXT:  ; %bb.2: ; %udiv-preheader
127; GFX9-NEXT:    v_sub_u32_e32 v12, 64, v22
128; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v22, v[8:9]
129; GFX9-NEXT:    v_lshlrev_b64 v[12:13], v12, v[10:11]
130; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v22
131; GFX9-NEXT:    v_or_b32_e32 v12, v6, v12
132; GFX9-NEXT:    v_subrev_u32_e32 v6, 64, v22
133; GFX9-NEXT:    v_or_b32_e32 v13, v7, v13
134; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v6, v[10:11]
135; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v22
136; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v13, vcc
137; GFX9-NEXT:    v_cndmask_b32_e64 v9, v7, v9, s[4:5]
138; GFX9-NEXT:    v_cndmask_b32_e32 v12, v6, v12, vcc
139; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v22, v[10:11]
140; GFX9-NEXT:    v_cndmask_b32_e64 v8, v12, v8, s[4:5]
141; GFX9-NEXT:    v_cndmask_b32_e32 v11, 0, v7, vcc
142; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v6, vcc
143; GFX9-NEXT:    v_add_co_u32_e32 v26, vcc, -1, v21
144; GFX9-NEXT:    v_addc_co_u32_e32 v27, vcc, -1, v20, vcc
145; GFX9-NEXT:    v_addc_co_u32_e32 v28, vcc, -1, v0, vcc
146; GFX9-NEXT:    v_mov_b32_e32 v14, 0
147; GFX9-NEXT:    v_mov_b32_e32 v12, 0
148; GFX9-NEXT:    v_addc_co_u32_e32 v29, vcc, -1, v1, vcc
149; GFX9-NEXT:    s_mov_b64 s[4:5], 0
150; GFX9-NEXT:    v_mov_b32_e32 v15, 0
151; GFX9-NEXT:    v_mov_b32_e32 v13, 0
152; GFX9-NEXT:    v_mov_b32_e32 v7, 0
153; GFX9-NEXT:  .LBB0_3: ; %udiv-do-while
154; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
155; GFX9-NEXT:    v_lshlrev_b64 v[30:31], 1, v[4:5]
156; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 31, v5
157; GFX9-NEXT:    v_or_b32_e32 v4, v14, v30
158; GFX9-NEXT:    v_lshrrev_b32_e32 v14, 31, v9
159; GFX9-NEXT:    v_lshlrev_b64 v[8:9], 1, v[8:9]
160; GFX9-NEXT:    v_or_b32_e32 v5, v15, v31
161; GFX9-NEXT:    v_lshlrev_b64 v[10:11], 1, v[10:11]
162; GFX9-NEXT:    v_lshrrev_b32_e32 v15, 31, v3
163; GFX9-NEXT:    v_or_b32_e32 v8, v8, v15
164; GFX9-NEXT:    v_or_b32_e32 v10, v10, v14
165; GFX9-NEXT:    v_sub_co_u32_e32 v14, vcc, v26, v8
166; GFX9-NEXT:    v_subb_co_u32_e32 v14, vcc, v27, v9, vcc
167; GFX9-NEXT:    v_subb_co_u32_e32 v14, vcc, v28, v10, vcc
168; GFX9-NEXT:    v_subb_co_u32_e32 v14, vcc, v29, v11, vcc
169; GFX9-NEXT:    v_ashrrev_i32_e32 v30, 31, v14
170; GFX9-NEXT:    v_and_b32_e32 v14, v30, v21
171; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
172; GFX9-NEXT:    v_sub_co_u32_e32 v8, vcc, v8, v14
173; GFX9-NEXT:    v_and_b32_e32 v14, v30, v20
174; GFX9-NEXT:    v_subb_co_u32_e32 v9, vcc, v9, v14, vcc
175; GFX9-NEXT:    v_or3_b32 v2, v2, v6, v12
176; GFX9-NEXT:    v_and_b32_e32 v6, v30, v0
177; GFX9-NEXT:    v_and_b32_e32 v14, v30, v1
178; GFX9-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v6, vcc
179; GFX9-NEXT:    v_subb_co_u32_e32 v11, vcc, v11, v14, vcc
180; GFX9-NEXT:    v_add_co_u32_e32 v22, vcc, -1, v22
181; GFX9-NEXT:    v_addc_co_u32_e32 v23, vcc, -1, v23, vcc
182; GFX9-NEXT:    v_addc_co_u32_e32 v24, vcc, -1, v24, vcc
183; GFX9-NEXT:    v_addc_co_u32_e32 v25, vcc, -1, v25, vcc
184; GFX9-NEXT:    v_or_b32_e32 v14, v22, v24
185; GFX9-NEXT:    v_or_b32_e32 v15, v23, v25
186; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
187; GFX9-NEXT:    v_and_b32_e32 v6, 1, v30
188; GFX9-NEXT:    v_mov_b32_e32 v15, v7
189; GFX9-NEXT:    v_or3_b32 v3, v3, 0, v13
190; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
191; GFX9-NEXT:    v_mov_b32_e32 v14, v6
192; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
193; GFX9-NEXT:    s_cbranch_execnz .LBB0_3
194; GFX9-NEXT:  ; %bb.4: ; %Flow
195; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
196; GFX9-NEXT:  .LBB0_5: ; %Flow2
197; GFX9-NEXT:    s_or_b64 exec, exec, s[6:7]
198; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[4:5]
199; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
200; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
201; GFX9-NEXT:    v_or3_b32 v13, v3, 0, v13
202; GFX9-NEXT:    v_or3_b32 v12, v2, v4, v12
203; GFX9-NEXT:    v_or_b32_e32 v7, v7, v1
204; GFX9-NEXT:    v_or_b32_e32 v6, v6, v0
205; GFX9-NEXT:  .LBB0_6: ; %Flow3
206; GFX9-NEXT:    s_or_b64 exec, exec, s[8:9]
207; GFX9-NEXT:    v_xor_b32_e32 v2, v17, v16
208; GFX9-NEXT:    v_xor_b32_e32 v3, v19, v18
209; GFX9-NEXT:    v_xor_b32_e32 v0, v6, v2
210; GFX9-NEXT:    v_xor_b32_e32 v1, v7, v3
211; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v2
212; GFX9-NEXT:    v_xor_b32_e32 v5, v12, v2
213; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
214; GFX9-NEXT:    v_xor_b32_e32 v4, v13, v3
215; GFX9-NEXT:    v_subb_co_u32_e32 v2, vcc, v5, v2, vcc
216; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
217; GFX9-NEXT:    s_setpc_b64 s[30:31]
218;
219; GFX9-O0-LABEL: v_sdiv_i128_vv:
220; GFX9-O0:       ; %bb.0: ; %_udiv-special-cases
221; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
223; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
224; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
225; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
226; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v6
227; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v4
228; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v2
229; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
230; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
231; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
232; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
233; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
234; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
235; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v0
236; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
237; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
238; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
239; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v5
240; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
241; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
242; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
243; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v3
244; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
245; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
246; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
247; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
248; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
249; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
250; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
251; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
252; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
253; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
254; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v9
255; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v10
256; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
257; GFX9-O0-NEXT:    ; implicit-def: $vgpr30 : SGPR spill to VGPR lane
258; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 0
259; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 1
260; GFX9-O0-NEXT:    s_mov_b32 s10, s6
261; GFX9-O0-NEXT:    v_writelane_b32 v30, s10, 2
262; GFX9-O0-NEXT:    s_mov_b32 s11, s7
263; GFX9-O0-NEXT:    v_writelane_b32 v30, s11, 3
264; GFX9-O0-NEXT:    v_sub_co_u32_e32 v5, vcc, s10, v1
265; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s11
266; GFX9-O0-NEXT:    v_subb_co_u32_e32 v3, vcc, v0, v2, vcc
267; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s10
268; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v0, v13, vcc
269; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s11
270; GFX9-O0-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v19, vcc
271; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
272; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
273; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
274; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v3
275; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
276; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[6:7]
277; GFX9-O0-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[9:10], s[4:5]
278; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[4:5]
279; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
280; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
281; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
282; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
283; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v2
284; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v3
285; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
286; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
287; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
288; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v0
289; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v5
290; GFX9-O0-NEXT:    v_cndmask_b32_e64 v1, v19, v0, s[4:5]
291; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v4
292; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, v13, v0, s[4:5]
293; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
294; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
295; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
296; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
297; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
298; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 killed $vgpr7_vgpr8 killed $exec
299; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v20
300; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v21
301; GFX9-O0-NEXT:    v_sub_co_u32_e32 v17, vcc, s10, v6
302; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s11
303; GFX9-O0-NEXT:    v_subb_co_u32_e32 v9, vcc, v7, v8, vcc
304; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s10
305; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v7, v12, vcc
306; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s11
307; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v7, v14, vcc
308; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
309; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
310; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
311; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v9
312; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
313; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[6:7]
314; GFX9-O0-NEXT:    v_cmp_lt_i64_e64 s[4:5], v[20:21], s[4:5]
315; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, v8, v9, s[4:5]
316; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
317; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v8, s[4:5]
318; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
319; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
320; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v6
321; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v9
322; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
323; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
324; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
325; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v7
326; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v11
327; GFX9-O0-NEXT:    v_cndmask_b32_e64 v8, v14, v7, s[4:5]
328; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v10
329; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v12, v7, s[4:5]
330; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
331; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
332; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v7
333; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v8
334; GFX9-O0-NEXT:    v_xor_b32_e64 v14, v14, v19
335; GFX9-O0-NEXT:    v_xor_b32_e64 v12, v12, v13
336; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
337; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v14
338; GFX9-O0-NEXT:    s_mov_b32 s4, 63
339; GFX9-O0-NEXT:    v_ashrrev_i64 v[12:13], s4, v[12:13]
340; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
341; GFX9-O0-NEXT:    s_nop 0
342; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
343; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
344; GFX9-O0-NEXT:    s_nop 0
345; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
346; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
347; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v10
348; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
349; GFX9-O0-NEXT:    s_nop 0
350; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
351; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v17
352; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v18
353; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
354; GFX9-O0-NEXT:    s_nop 0
355; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
356; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v5
357; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v4
358; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
359; GFX9-O0-NEXT:    s_nop 0
360; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
361; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v15
362; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v16
363; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
364; GFX9-O0-NEXT:    s_nop 0
365; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
366; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
367; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v18
368; GFX9-O0-NEXT:    v_or_b32_e64 v14, v12, v13
369; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v10
370; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v17
371; GFX9-O0-NEXT:    v_or_b32_e64 v12, v12, v13
372; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
373; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v14
374; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[12:13], s[6:7]
375; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v5
376; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v16
377; GFX9-O0-NEXT:    v_or_b32_e64 v14, v12, v13
378; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v4
379; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v15
380; GFX9-O0-NEXT:    v_or_b32_e64 v12, v12, v13
381; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
382; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v14
383; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[12:13], s[6:7]
384; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
385; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
386; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[10:11], s[8:9]
387; GFX9-O0-NEXT:    v_ffbh_u32_e64 v7, v7
388; GFX9-O0-NEXT:    s_mov_b32 s13, 32
389; GFX9-O0-NEXT:    v_add_u32_e64 v7, v7, s13
390; GFX9-O0-NEXT:    v_ffbh_u32_e64 v8, v8
391; GFX9-O0-NEXT:    v_min_u32_e64 v7, v7, v8
392; GFX9-O0-NEXT:    s_mov_b32 s12, 0
393; GFX9-O0-NEXT:    ; implicit-def: $sgpr14
394; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s12
395; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
396; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v10
397; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v8
398; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v6
399; GFX9-O0-NEXT:    v_add_u32_e64 v6, v6, s13
400; GFX9-O0-NEXT:    v_ffbh_u32_e64 v9, v9
401; GFX9-O0-NEXT:    v_min_u32_e64 v12, v6, v9
402; GFX9-O0-NEXT:    ; implicit-def: $sgpr14
403; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s12
404; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
405; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v6
406; GFX9-O0-NEXT:    s_mov_b64 s[14:15], 64
407; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v12
408; GFX9-O0-NEXT:    s_mov_b32 s16, s14
409; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v13
410; GFX9-O0-NEXT:    s_mov_b32 s18, s15
411; GFX9-O0-NEXT:    v_add_co_u32_e64 v9, s[16:17], v9, s16
412; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s18
413; GFX9-O0-NEXT:    v_addc_co_u32_e64 v6, s[16:17], v6, v10, s[16:17]
414; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
415; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v6
416; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v10
417; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v11, s[8:9]
418; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v7
419; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v9
420; GFX9-O0-NEXT:    v_cndmask_b32_e64 v8, v7, v8, s[8:9]
421; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
422; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
423; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
424; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v6
425; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
426; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[4:5], s[8:9]
427; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v0
428; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s13
429; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v1
430; GFX9-O0-NEXT:    v_min_u32_e64 v5, v4, v5
431; GFX9-O0-NEXT:    ; implicit-def: $sgpr16
432; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s12
433; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
434; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
435; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
436; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v2
437; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s13
438; GFX9-O0-NEXT:    v_ffbh_u32_e64 v10, v3
439; GFX9-O0-NEXT:    v_min_u32_e64 v11, v4, v10
440; GFX9-O0-NEXT:    ; implicit-def: $sgpr13
441; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s12
442; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
443; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v4
444; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v11
445; GFX9-O0-NEXT:    s_mov_b32 s12, s14
446; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
447; GFX9-O0-NEXT:    s_mov_b32 s14, s15
448; GFX9-O0-NEXT:    v_add_co_u32_e64 v10, s[12:13], v10, s12
449; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s14
450; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[12:13], v4, v11, s[12:13]
451; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
452; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v4
453; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v11
454; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[8:9]
455; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
456; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
457; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
458; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
459; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
460; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
461; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
462; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
463; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
464; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
465; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
466; GFX9-O0-NEXT:    v_sub_co_u32_e32 v4, vcc, v4, v7
467; GFX9-O0-NEXT:    v_subb_co_u32_e32 v8, vcc, v5, v6, vcc
468; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s10
469; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s10
470; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v6, vcc
471; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s11
472; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
473; GFX9-O0-NEXT:    v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
474; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
475; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
476; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
477; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
478; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
479; GFX9-O0-NEXT:    s_nop 0
480; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
481; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
482; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
483; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
484; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
485; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
486; GFX9-O0-NEXT:    s_nop 0
487; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
488; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
489; GFX9-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
490; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
491; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[14:15]
492; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
493; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[14:15]
494; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
495; GFX9-O0-NEXT:    v_and_b32_e64 v6, 1, v6
496; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[8:9], v6, 1
497; GFX9-O0-NEXT:    s_or_b64 s[8:9], s[4:5], s[8:9]
498; GFX9-O0-NEXT:    s_mov_b64 s[4:5], -1
499; GFX9-O0-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
500; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
501; GFX9-O0-NEXT:    s_mov_b32 s14, s13
502; GFX9-O0-NEXT:    v_xor_b32_e64 v6, v6, s14
503; GFX9-O0-NEXT:    ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
504; GFX9-O0-NEXT:    v_xor_b32_e64 v4, v4, s12
505; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
506; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
507; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
508; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v8
509; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v9
510; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
511; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
512; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
513; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
514; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
515; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
516; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
517; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v1, v4, s[8:9]
518; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s10
519; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[8:9]
520; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
521; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
522; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
523; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
524; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
525; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v3, v4, s[8:9]
526; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s10
527; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[8:9]
528; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
529; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
530; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
531; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
532; GFX9-O0-NEXT:    s_and_b64 s[6:7], s[4:5], s[6:7]
533; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
534; GFX9-O0-NEXT:    s_nop 0
535; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
536; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
537; GFX9-O0-NEXT:    s_nop 0
538; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
539; GFX9-O0-NEXT:    s_mov_b64 s[4:5], exec
540; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 4
541; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 5
542; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
543; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
544; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
545; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
546; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
547; GFX9-O0-NEXT:    s_cbranch_execz .LBB0_3
548; GFX9-O0-NEXT:    s_branch .LBB0_8
549; GFX9-O0-NEXT:  .LBB0_1: ; %Flow
550; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
551; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
552; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
553; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
554; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 6
555; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 7
556; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
557; GFX9-O0-NEXT:  ; %bb.2: ; %Flow
558; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
559; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
560; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
561; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
562; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
563; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
564; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
565; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
566; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
567; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
568; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
569; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
570; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
571; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
572; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
573; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
574; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
575; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
576; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
577; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
578; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
579; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
580; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
581; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
582; GFX9-O0-NEXT:    s_branch .LBB0_5
583; GFX9-O0-NEXT:  .LBB0_3: ; %Flow2
584; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
585; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
586; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
587; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
588; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 4
589; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 5
590; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
591; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
592; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
593; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
594; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
595; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
596; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
597; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
598; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
599; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
600; GFX9-O0-NEXT:    s_nop 0
601; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
602; GFX9-O0-NEXT:    s_branch .LBB0_9
603; GFX9-O0-NEXT:  .LBB0_4: ; %udiv-loop-exit
604; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
605; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
606; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
607; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
608; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
609; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
610; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
611; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
612; GFX9-O0-NEXT:    s_mov_b32 s4, 1
613; GFX9-O0-NEXT:    s_waitcnt vmcnt(2)
614; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s4, v[0:1]
615; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
616; GFX9-O0-NEXT:    v_lshlrev_b64 v[9:10], s4, v[9:10]
617; GFX9-O0-NEXT:    s_mov_b32 s4, 63
618; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
619; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v1
620; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
621; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v8
622; GFX9-O0-NEXT:    v_or3_b32 v4, v4, v11, v12
623; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
624; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v9
625; GFX9-O0-NEXT:    v_or3_b32 v0, v0, v1, v7
626; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
627; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
628; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
629; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v6
630; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v7
631; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
632; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
633; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
634; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
635; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
636; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
637; GFX9-O0-NEXT:    s_nop 0
638; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
639; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
640; GFX9-O0-NEXT:    s_nop 0
641; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
642; GFX9-O0-NEXT:    s_branch .LBB0_3
643; GFX9-O0-NEXT:  .LBB0_5: ; %Flow1
644; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
645; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
646; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
647; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
648; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 8
649; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 9
650; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
651; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
652; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
653; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
654; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
655; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
656; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
657; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
658; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
659; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
660; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
661; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
662; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
663; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
664; GFX9-O0-NEXT:    s_nop 0
665; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
666; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
667; GFX9-O0-NEXT:    s_nop 0
668; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
669; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
670; GFX9-O0-NEXT:    s_nop 0
671; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
672; GFX9-O0-NEXT:    s_branch .LBB0_4
673; GFX9-O0-NEXT:  .LBB0_6: ; %udiv-do-while
674; GFX9-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
675; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
676; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
677; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
678; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
679; GFX9-O0-NEXT:    v_readlane_b32 s6, v30, 10
680; GFX9-O0-NEXT:    v_readlane_b32 s7, v30, 11
681; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
682; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
683; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
684; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
685; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
686; GFX9-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
687; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
688; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
689; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
690; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
691; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
692; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
693; GFX9-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
694; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
695; GFX9-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
696; GFX9-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
697; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
698; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
699; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
700; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
701; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
702; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
703; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
704; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
705; GFX9-O0-NEXT:    s_mov_b32 s4, 63
706; GFX9-O0-NEXT:    s_waitcnt vmcnt(16)
707; GFX9-O0-NEXT:    v_lshrrev_b64 v[28:29], s4, v[2:3]
708; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v29
709; GFX9-O0-NEXT:    s_mov_b32 s5, 1
710; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], s5, v[22:23]
711; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v23
712; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
713; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v28
714; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v22
715; GFX9-O0-NEXT:    v_or_b32_e64 v22, v5, v10
716; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
717; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v4
718; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[2:3]
719; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], s4, v[6:7]
720; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v29
721; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
722; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
723; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v28
724; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
725; GFX9-O0-NEXT:    v_or_b32_e64 v4, v3, v4
726; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
727; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
728; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s5, v[0:1]
729; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[6:7]
730; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
731; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
732; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v29
733; GFX9-O0-NEXT:    s_waitcnt vmcnt(10)
734; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v27
735; GFX9-O0-NEXT:    v_or3_b32 v6, v6, v7, v10
736; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
737; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v28
738; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v26
739; GFX9-O0-NEXT:    v_or3_b32 v0, v0, v1, v7
740; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
741; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
742; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
743; GFX9-O0-NEXT:    s_waitcnt vmcnt(8)
744; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v25
745; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
746; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
747; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v24
748; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
749; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
750; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
751; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
752; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
753; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v22
754; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
755; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
756; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
757; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v14
758; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v15
759; GFX9-O0-NEXT:    v_sub_co_u32_e32 v13, vcc, v13, v6
760; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
761; GFX9-O0-NEXT:    v_subb_co_u32_e32 v12, vcc, v12, v10, vcc
762; GFX9-O0-NEXT:    v_subb_co_u32_e32 v11, vcc, v11, v4, vcc
763; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
764; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
765; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
766; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
767; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
768; GFX9-O0-NEXT:    v_ashrrev_i64 v[13:14], s4, v[11:12]
769; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
770; GFX9-O0-NEXT:    s_mov_b64 s[4:5], 1
771; GFX9-O0-NEXT:    s_mov_b32 s8, s5
772; GFX9-O0-NEXT:    v_and_b32_e64 v12, v7, s8
773; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v13
774; GFX9-O0-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
775; GFX9-O0-NEXT:    v_and_b32_e64 v14, v11, s4
776; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
777; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v12
778; GFX9-O0-NEXT:    v_mov_b32_e32 v12, 0
779; GFX9-O0-NEXT:    v_mov_b32_e32 v13, 0
780; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v21
781; GFX9-O0-NEXT:    v_and_b32_e64 v22, v7, v22
782; GFX9-O0-NEXT:    v_and_b32_e64 v20, v11, v20
783; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
784; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v22
785; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v19
786; GFX9-O0-NEXT:    v_and_b32_e64 v7, v7, v22
787; GFX9-O0-NEXT:    v_and_b32_e64 v22, v11, v18
788; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
789; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v7
790; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v22
791; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v23
792; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
793; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v21
794; GFX9-O0-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v19
795; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
796; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
797; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
798; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
799; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
800; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
801; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
802; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
803; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
804; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
805; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v10
806; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v8
807; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 killed $vgpr8_vgpr9 killed $exec
808; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
809; GFX9-O0-NEXT:    s_mov_b32 s5, s8
810; GFX9-O0-NEXT:    s_mov_b32 s4, s9
811; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v16
812; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
813; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
814; GFX9-O0-NEXT:    v_add_co_u32_e32 v19, vcc, v11, v16
815; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s4
816; GFX9-O0-NEXT:    v_addc_co_u32_e32 v9, vcc, v9, v11, vcc
817; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s5
818; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
819; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s4
820; GFX9-O0-NEXT:    v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
821; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
822; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
823; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
824; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v9
825; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
826; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
827; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
828; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v8
829; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v16
830; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v17
831; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v19
832; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
833; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v17
834; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v20
835; GFX9-O0-NEXT:    v_or_b32_e64 v18, v18, v21
836; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v16
837; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v19
838; GFX9-O0-NEXT:    v_or_b32_e64 v16, v16, v17
839; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
840; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v18
841; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13]
842; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
843; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v3
844; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v2
845; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
846; GFX9-O0-NEXT:    s_nop 0
847; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
848; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v1
849; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v0
850; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
851; GFX9-O0-NEXT:    s_nop 0
852; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
853; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v15
854; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v14
855; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
856; GFX9-O0-NEXT:    s_nop 0
857; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
858; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v13
859; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v12
860; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
861; GFX9-O0-NEXT:    s_nop 0
862; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
863; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
864; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 6
865; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 7
866; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
867; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 10
868; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 11
869; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
870; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
871; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
872; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
873; GFX9-O0-NEXT:    s_nop 0
874; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
875; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
876; GFX9-O0-NEXT:    s_nop 0
877; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
878; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
879; GFX9-O0-NEXT:    s_nop 0
880; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
881; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
882; GFX9-O0-NEXT:    s_nop 0
883; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
884; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
885; GFX9-O0-NEXT:    s_nop 0
886; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
887; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
888; GFX9-O0-NEXT:    s_nop 0
889; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
890; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
891; GFX9-O0-NEXT:    s_nop 0
892; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
893; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
894; GFX9-O0-NEXT:    s_nop 0
895; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
896; GFX9-O0-NEXT:    s_andn2_b64 exec, exec, s[4:5]
897; GFX9-O0-NEXT:    s_cbranch_execnz .LBB0_6
898; GFX9-O0-NEXT:    s_branch .LBB0_1
899; GFX9-O0-NEXT:  .LBB0_7: ; %udiv-preheader
900; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
901; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
902; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
903; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
904; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
905; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
906; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
907; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
908; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
909; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
910; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
911; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
912; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
913; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
914; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
915; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
916; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
917; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
918; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
919; GFX9-O0-NEXT:    s_waitcnt vmcnt(9)
920; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
921; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
922; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], v4, v[20:21]
923; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
924; GFX9-O0-NEXT:    s_mov_b32 s6, 64
925; GFX9-O0-NEXT:    v_sub_u32_e64 v12, s6, v4
926; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], v12, v[18:19]
927; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v23
928; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v12
929; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
930; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v22
931; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
932; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
933; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
934; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
935; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v4, s6
936; GFX9-O0-NEXT:    v_sub_u32_e64 v5, v4, s6
937; GFX9-O0-NEXT:    v_lshrrev_b64 v[22:23], v5, v[18:19]
938; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
939; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[4:5]
940; GFX9-O0-NEXT:    s_mov_b32 s6, 0
941; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v4, s6
942; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v21
943; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[6:7]
944; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
945; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v22
946; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
947; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v20
948; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[6:7]
949; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
950; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
951; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
952; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
953; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], v4, v[18:19]
954; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v5
955; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
956; GFX9-O0-NEXT:    s_mov_b32 s8, s7
957; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s8
958; GFX9-O0-NEXT:    v_cndmask_b32_e64 v12, v12, v15, s[4:5]
959; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
960; GFX9-O0-NEXT:    s_mov_b32 s8, s6
961; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
962; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
963; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
964; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
965; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
966; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
967; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v13
968; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v14
969; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
970; GFX9-O0-NEXT:    s_mov_b32 s5, s8
971; GFX9-O0-NEXT:    s_mov_b32 s4, s9
972; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v16
973; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v17
974; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
975; GFX9-O0-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v16
976; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s4
977; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v15, v16, vcc
978; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s5
979; GFX9-O0-NEXT:    v_addc_co_u32_e32 v14, vcc, v14, v15, vcc
980; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s4
981; GFX9-O0-NEXT:    v_addc_co_u32_e32 v13, vcc, v13, v15, vcc
982; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
983; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
984; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
985; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v13
986; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
987; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
988; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
989; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v16
990; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
991; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
992; GFX9-O0-NEXT:    s_nop 0
993; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
994; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
995; GFX9-O0-NEXT:    s_nop 0
996; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
997; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[6:7]
998; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s9
999; GFX9-O0-NEXT:    v_mov_b32_e32 v14, s8
1000; GFX9-O0-NEXT:    v_mov_b32_e32 v13, s7
1001; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s6
1002; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 10
1003; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 11
1004; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
1005; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
1006; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
1007; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
1008; GFX9-O0-NEXT:    s_nop 0
1009; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
1010; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
1011; GFX9-O0-NEXT:    s_nop 0
1012; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
1013; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
1014; GFX9-O0-NEXT:    s_nop 0
1015; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
1016; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
1017; GFX9-O0-NEXT:    s_nop 0
1018; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
1019; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
1020; GFX9-O0-NEXT:    s_nop 0
1021; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
1022; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
1023; GFX9-O0-NEXT:    s_nop 0
1024; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
1025; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
1026; GFX9-O0-NEXT:    s_nop 0
1027; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
1028; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
1029; GFX9-O0-NEXT:    s_nop 0
1030; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
1031; GFX9-O0-NEXT:    s_branch .LBB0_6
1032; GFX9-O0-NEXT:  .LBB0_8: ; %udiv-bb1
1033; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
1034; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
1035; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
1036; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1037; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1038; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1039; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1040; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1041; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1042; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1043; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1044; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 1
1045; GFX9-O0-NEXT:    s_mov_b32 s5, s6
1046; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
1047; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
1048; GFX9-O0-NEXT:    s_mov_b32 s4, s7
1049; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
1050; GFX9-O0-NEXT:    s_mov_b32 s8, s6
1051; GFX9-O0-NEXT:    s_mov_b32 s9, s7
1052; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v4
1053; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
1054; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
1055; GFX9-O0-NEXT:    v_add_co_u32_e32 v8, vcc, v3, v4
1056; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s4
1057; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
1058; GFX9-O0-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v4, vcc
1059; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
1060; GFX9-O0-NEXT:    v_addc_co_u32_e32 v0, vcc, v0, v4, vcc
1061; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s9
1062; GFX9-O0-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
1063; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1064; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1065; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1066; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
1067; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1068; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1069; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1070; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
1071; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
1072; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
1073; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
1074; GFX9-O0-NEXT:    s_nop 0
1075; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
1076; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
1077; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
1078; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
1079; GFX9-O0-NEXT:    s_nop 0
1080; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
1081; GFX9-O0-NEXT:    s_mov_b32 s4, 0x7f
1082; GFX9-O0-NEXT:    v_sub_u32_e64 v2, s4, v3
1083; GFX9-O0-NEXT:    v_lshlrev_b64 v[4:5], v2, v[10:11]
1084; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
1085; GFX9-O0-NEXT:    s_mov_b32 s4, 64
1086; GFX9-O0-NEXT:    v_sub_u32_e64 v13, s4, v2
1087; GFX9-O0-NEXT:    v_lshrrev_b64 v[13:14], v13, v[6:7]
1088; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v14
1089; GFX9-O0-NEXT:    v_or_b32_e64 v12, v12, v15
1090; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
1091; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
1092; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
1093; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
1094; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
1095; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v5
1096; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v2, s4
1097; GFX9-O0-NEXT:    s_mov_b32 s10, 63
1098; GFX9-O0-NEXT:    v_sub_u32_e64 v3, s10, v3
1099; GFX9-O0-NEXT:    v_lshlrev_b64 v[12:13], v3, v[6:7]
1100; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
1101; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[4:5]
1102; GFX9-O0-NEXT:    s_mov_b32 s10, 0
1103; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[10:11], v2, s10
1104; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v11
1105; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[10:11]
1106; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
1107; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
1108; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
1109; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
1110; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[10:11]
1111; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
1112; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
1113; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
1114; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v3
1115; GFX9-O0-NEXT:    v_lshlrev_b64 v[6:7], v2, v[6:7]
1116; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v7
1117; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s9
1118; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[4:5]
1119; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
1120; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s8
1121; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v3, v6, s[4:5]
1122; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1123; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1124; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
1125; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v2
1126; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
1127; GFX9-O0-NEXT:    s_nop 0
1128; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
1129; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
1130; GFX9-O0-NEXT:    s_nop 0
1131; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
1132; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
1133; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v9
1134; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
1135; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
1136; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v8
1137; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v1
1138; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1139; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
1140; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7]
1141; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
1142; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s8
1143; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s9
1144; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s6
1145; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s7
1146; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
1147; GFX9-O0-NEXT:    s_nop 0
1148; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
1149; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
1150; GFX9-O0-NEXT:    s_nop 0
1151; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
1152; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
1153; GFX9-O0-NEXT:    s_nop 0
1154; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
1155; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
1156; GFX9-O0-NEXT:    s_nop 0
1157; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
1158; GFX9-O0-NEXT:    s_mov_b64 s[6:7], exec
1159; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
1160; GFX9-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
1161; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 8
1162; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 9
1163; GFX9-O0-NEXT:    s_or_saveexec_b64 s[22:23], -1
1164; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
1165; GFX9-O0-NEXT:    s_mov_b64 exec, s[22:23]
1166; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
1167; GFX9-O0-NEXT:    s_cbranch_execz .LBB0_5
1168; GFX9-O0-NEXT:    s_branch .LBB0_7
1169; GFX9-O0-NEXT:  .LBB0_9: ; %udiv-end
1170; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
1171; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
1172; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
1173; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
1174; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
1175; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
1176; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
1177; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
1178; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
1179; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
1180; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
1181; GFX9-O0-NEXT:    v_xor_b32_e64 v3, v3, v2
1182; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v7
1183; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
1184; GFX9-O0-NEXT:    v_xor_b32_e64 v8, v5, v4
1185; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1186; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v3
1187; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
1188; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
1189; GFX9-O0-NEXT:    v_xor_b32_e64 v3, v3, v6
1190; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v10
1191; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
1192; GFX9-O0-NEXT:    v_xor_b32_e64 v0, v0, v7
1193; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1194; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
1195; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v0
1196; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v1
1197; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v8
1198; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v9
1199; GFX9-O0-NEXT:    v_sub_co_u32_e32 v5, vcc, v5, v7
1200; GFX9-O0-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v6, vcc
1201; GFX9-O0-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
1202; GFX9-O0-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
1203; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1204; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1205; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
1206; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v1
1207; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1208; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
1209; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
1210; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v0
1211; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v5
1212; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
1213; GFX9-O0-NEXT:    s_mov_b32 s4, 32
1214; GFX9-O0-NEXT:    v_lshrrev_b64 v[5:6], s4, v[5:6]
1215; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
1216; GFX9-O0-NEXT:    v_lshrrev_b64 v[3:4], s4, v[3:4]
1217; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
1218; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
1219; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
1220; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
1221; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
1222; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
1223;
1224; GFX9-G-LABEL: v_sdiv_i128_vv:
1225; GFX9-G:       ; %bb.0: ; %_udiv-special-cases
1226; GFX9-G-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1227; GFX9-G-NEXT:    v_ashrrev_i32_e32 v16, 31, v3
1228; GFX9-G-NEXT:    v_xor_b32_e32 v0, v16, v0
1229; GFX9-G-NEXT:    v_xor_b32_e32 v1, v16, v1
1230; GFX9-G-NEXT:    v_sub_co_u32_e32 v8, vcc, v0, v16
1231; GFX9-G-NEXT:    v_xor_b32_e32 v2, v16, v2
1232; GFX9-G-NEXT:    v_subb_co_u32_e32 v9, vcc, v1, v16, vcc
1233; GFX9-G-NEXT:    v_ashrrev_i32_e32 v17, 31, v7
1234; GFX9-G-NEXT:    v_xor_b32_e32 v3, v16, v3
1235; GFX9-G-NEXT:    v_subb_co_u32_e32 v10, vcc, v2, v16, vcc
1236; GFX9-G-NEXT:    v_subb_co_u32_e32 v11, vcc, v3, v16, vcc
1237; GFX9-G-NEXT:    v_xor_b32_e32 v0, v17, v4
1238; GFX9-G-NEXT:    v_xor_b32_e32 v1, v17, v5
1239; GFX9-G-NEXT:    v_sub_co_u32_e32 v18, vcc, v0, v17
1240; GFX9-G-NEXT:    v_xor_b32_e32 v2, v17, v6
1241; GFX9-G-NEXT:    v_subb_co_u32_e32 v19, vcc, v1, v17, vcc
1242; GFX9-G-NEXT:    v_xor_b32_e32 v3, v17, v7
1243; GFX9-G-NEXT:    v_subb_co_u32_e32 v4, vcc, v2, v17, vcc
1244; GFX9-G-NEXT:    v_subb_co_u32_e32 v5, vcc, v3, v17, vcc
1245; GFX9-G-NEXT:    v_or_b32_e32 v0, v18, v4
1246; GFX9-G-NEXT:    v_or_b32_e32 v1, v19, v5
1247; GFX9-G-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1248; GFX9-G-NEXT:    v_or_b32_e32 v0, v8, v10
1249; GFX9-G-NEXT:    v_or_b32_e32 v1, v9, v11
1250; GFX9-G-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
1251; GFX9-G-NEXT:    v_ffbh_u32_e32 v1, v18
1252; GFX9-G-NEXT:    v_ffbh_u32_e32 v0, v19
1253; GFX9-G-NEXT:    v_add_u32_e32 v1, 32, v1
1254; GFX9-G-NEXT:    v_ffbh_u32_e32 v2, v4
1255; GFX9-G-NEXT:    v_min_u32_e32 v0, v0, v1
1256; GFX9-G-NEXT:    v_ffbh_u32_e32 v1, v5
1257; GFX9-G-NEXT:    v_add_u32_e32 v2, 32, v2
1258; GFX9-G-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[4:5]
1259; GFX9-G-NEXT:    v_add_u32_e32 v0, 64, v0
1260; GFX9-G-NEXT:    v_min_u32_e32 v1, v1, v2
1261; GFX9-G-NEXT:    v_ffbh_u32_e32 v2, v8
1262; GFX9-G-NEXT:    v_cndmask_b32_e64 v0, v1, v0, s[6:7]
1263; GFX9-G-NEXT:    v_ffbh_u32_e32 v1, v9
1264; GFX9-G-NEXT:    v_add_u32_e32 v2, 32, v2
1265; GFX9-G-NEXT:    v_ffbh_u32_e32 v3, v10
1266; GFX9-G-NEXT:    v_min_u32_e32 v1, v1, v2
1267; GFX9-G-NEXT:    v_ffbh_u32_e32 v2, v11
1268; GFX9-G-NEXT:    v_add_u32_e32 v3, 32, v3
1269; GFX9-G-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[10:11]
1270; GFX9-G-NEXT:    v_add_u32_e32 v1, 64, v1
1271; GFX9-G-NEXT:    v_min_u32_e32 v2, v2, v3
1272; GFX9-G-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[6:7]
1273; GFX9-G-NEXT:    v_sub_co_u32_e64 v0, s[6:7], v0, v1
1274; GFX9-G-NEXT:    v_subb_co_u32_e64 v1, s[6:7], 0, 0, s[6:7]
1275; GFX9-G-NEXT:    v_mov_b32_e32 v6, 0x7f
1276; GFX9-G-NEXT:    v_subb_co_u32_e64 v2, s[6:7], 0, 0, s[6:7]
1277; GFX9-G-NEXT:    v_mov_b32_e32 v7, 0
1278; GFX9-G-NEXT:    v_subb_co_u32_e64 v3, s[6:7], 0, 0, s[6:7]
1279; GFX9-G-NEXT:    v_cmp_gt_u64_e64 s[6:7], v[0:1], v[6:7]
1280; GFX9-G-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1281; GFX9-G-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[6:7]
1282; GFX9-G-NEXT:    v_cmp_lt_u64_e64 s[6:7], 0, v[2:3]
1283; GFX9-G-NEXT:    v_or_b32_e32 v15, v1, v3
1284; GFX9-G-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[6:7]
1285; GFX9-G-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[2:3]
1286; GFX9-G-NEXT:    s_mov_b64 s[8:9], 0
1287; GFX9-G-NEXT:    v_cndmask_b32_e64 v6, v7, v6, s[6:7]
1288; GFX9-G-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
1289; GFX9-G-NEXT:    v_or_b32_e32 v20, v7, v6
1290; GFX9-G-NEXT:    v_xor_b32_e32 v6, 0x7f, v0
1291; GFX9-G-NEXT:    v_or_b32_e32 v14, v6, v2
1292; GFX9-G-NEXT:    v_and_b32_e32 v6, 1, v20
1293; GFX9-G-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
1294; GFX9-G-NEXT:    v_cndmask_b32_e64 v6, v8, 0, vcc
1295; GFX9-G-NEXT:    v_cndmask_b32_e64 v7, v9, 0, vcc
1296; GFX9-G-NEXT:    v_cndmask_b32_e64 v12, v10, 0, vcc
1297; GFX9-G-NEXT:    v_cndmask_b32_e64 v13, v11, 0, vcc
1298; GFX9-G-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
1299; GFX9-G-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
1300; GFX9-G-NEXT:    v_or_b32_e32 v14, v20, v14
1301; GFX9-G-NEXT:    v_and_b32_e32 v14, 1, v14
1302; GFX9-G-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v14
1303; GFX9-G-NEXT:    s_xor_b64 s[4:5], vcc, -1
1304; GFX9-G-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
1305; GFX9-G-NEXT:    s_cbranch_execz .LBB0_6
1306; GFX9-G-NEXT:  ; %bb.1: ; %udiv-bb1
1307; GFX9-G-NEXT:    v_add_co_u32_e32 v20, vcc, 1, v0
1308; GFX9-G-NEXT:    v_addc_co_u32_e32 v21, vcc, 0, v1, vcc
1309; GFX9-G-NEXT:    v_addc_co_u32_e32 v22, vcc, 0, v2, vcc
1310; GFX9-G-NEXT:    v_addc_co_u32_e32 v23, vcc, 0, v3, vcc
1311; GFX9-G-NEXT:    s_xor_b64 s[4:5], vcc, -1
1312; GFX9-G-NEXT:    v_sub_co_u32_e32 v12, vcc, 0x7f, v0
1313; GFX9-G-NEXT:    v_sub_u32_e32 v0, 64, v12
1314; GFX9-G-NEXT:    v_lshrrev_b64 v[0:1], v0, v[8:9]
1315; GFX9-G-NEXT:    v_lshlrev_b64 v[2:3], v12, v[10:11]
1316; GFX9-G-NEXT:    v_add_u32_e32 v13, 0xffffffc0, v12
1317; GFX9-G-NEXT:    v_lshlrev_b64 v[6:7], v12, v[8:9]
1318; GFX9-G-NEXT:    v_or_b32_e32 v2, v0, v2
1319; GFX9-G-NEXT:    v_or_b32_e32 v3, v1, v3
1320; GFX9-G-NEXT:    v_lshlrev_b64 v[0:1], v13, v[8:9]
1321; GFX9-G-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v12
1322; GFX9-G-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
1323; GFX9-G-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
1324; GFX9-G-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1325; GFX9-G-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1326; GFX9-G-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v12
1327; GFX9-G-NEXT:    v_cndmask_b32_e32 v12, v0, v10, vcc
1328; GFX9-G-NEXT:    v_cndmask_b32_e32 v13, v1, v11, vcc
1329; GFX9-G-NEXT:    s_mov_b64 s[10:11], s[8:9]
1330; GFX9-G-NEXT:    v_mov_b32_e32 v0, s8
1331; GFX9-G-NEXT:    v_mov_b32_e32 v1, s9
1332; GFX9-G-NEXT:    v_mov_b32_e32 v2, s10
1333; GFX9-G-NEXT:    v_mov_b32_e32 v3, s11
1334; GFX9-G-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
1335; GFX9-G-NEXT:    s_xor_b64 s[12:13], exec, s[8:9]
1336; GFX9-G-NEXT:    s_cbranch_execz .LBB0_5
1337; GFX9-G-NEXT:  ; %bb.2: ; %udiv-preheader
1338; GFX9-G-NEXT:    v_sub_u32_e32 v2, 64, v20
1339; GFX9-G-NEXT:    v_lshrrev_b64 v[0:1], v20, v[8:9]
1340; GFX9-G-NEXT:    v_lshlrev_b64 v[2:3], v2, v[10:11]
1341; GFX9-G-NEXT:    v_add_u32_e32 v24, 0xffffffc0, v20
1342; GFX9-G-NEXT:    v_lshrrev_b64 v[14:15], v20, v[10:11]
1343; GFX9-G-NEXT:    v_or_b32_e32 v2, v0, v2
1344; GFX9-G-NEXT:    v_or_b32_e32 v3, v1, v3
1345; GFX9-G-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
1346; GFX9-G-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v20
1347; GFX9-G-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1348; GFX9-G-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1349; GFX9-G-NEXT:    v_cndmask_b32_e32 v14, 0, v14, vcc
1350; GFX9-G-NEXT:    v_cndmask_b32_e32 v15, 0, v15, vcc
1351; GFX9-G-NEXT:    v_add_co_u32_e32 v24, vcc, -1, v18
1352; GFX9-G-NEXT:    s_mov_b64 s[8:9], 0
1353; GFX9-G-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v20
1354; GFX9-G-NEXT:    v_addc_co_u32_e32 v25, vcc, -1, v19, vcc
1355; GFX9-G-NEXT:    v_cndmask_b32_e64 v10, v0, v8, s[4:5]
1356; GFX9-G-NEXT:    v_cndmask_b32_e64 v11, v1, v9, s[4:5]
1357; GFX9-G-NEXT:    v_addc_co_u32_e32 v26, vcc, -1, v4, vcc
1358; GFX9-G-NEXT:    s_mov_b64 s[10:11], s[8:9]
1359; GFX9-G-NEXT:    v_mov_b32_e32 v0, s8
1360; GFX9-G-NEXT:    v_addc_co_u32_e32 v27, vcc, -1, v5, vcc
1361; GFX9-G-NEXT:    v_mov_b32_e32 v9, 0
1362; GFX9-G-NEXT:    v_mov_b32_e32 v1, s9
1363; GFX9-G-NEXT:    v_mov_b32_e32 v2, s10
1364; GFX9-G-NEXT:    v_mov_b32_e32 v3, s11
1365; GFX9-G-NEXT:  .LBB0_3: ; %udiv-do-while
1366; GFX9-G-NEXT:    ; =>This Inner Loop Header: Depth=1
1367; GFX9-G-NEXT:    v_lshlrev_b64 v[2:3], 1, v[6:7]
1368; GFX9-G-NEXT:    v_lshrrev_b32_e32 v8, 31, v7
1369; GFX9-G-NEXT:    v_or_b32_e32 v6, v0, v2
1370; GFX9-G-NEXT:    v_or_b32_e32 v7, v1, v3
1371; GFX9-G-NEXT:    v_lshlrev_b64 v[2:3], 1, v[10:11]
1372; GFX9-G-NEXT:    v_lshrrev_b32_e32 v10, 31, v13
1373; GFX9-G-NEXT:    v_lshlrev_b64 v[0:1], 1, v[14:15]
1374; GFX9-G-NEXT:    v_or_b32_e32 v2, v2, v10
1375; GFX9-G-NEXT:    v_lshrrev_b32_e32 v14, 31, v11
1376; GFX9-G-NEXT:    v_sub_co_u32_e32 v10, vcc, v24, v2
1377; GFX9-G-NEXT:    v_or_b32_e32 v0, v0, v14
1378; GFX9-G-NEXT:    v_subb_co_u32_e32 v10, vcc, v25, v3, vcc
1379; GFX9-G-NEXT:    v_subb_co_u32_e32 v10, vcc, v26, v0, vcc
1380; GFX9-G-NEXT:    v_subb_co_u32_e32 v10, vcc, v27, v1, vcc
1381; GFX9-G-NEXT:    v_ashrrev_i32_e32 v28, 31, v10
1382; GFX9-G-NEXT:    v_and_b32_e32 v10, v28, v18
1383; GFX9-G-NEXT:    v_and_b32_e32 v11, v28, v19
1384; GFX9-G-NEXT:    v_sub_co_u32_e32 v10, vcc, v2, v10
1385; GFX9-G-NEXT:    v_subb_co_u32_e32 v11, vcc, v3, v11, vcc
1386; GFX9-G-NEXT:    v_and_b32_e32 v2, v28, v4
1387; GFX9-G-NEXT:    v_and_b32_e32 v3, v28, v5
1388; GFX9-G-NEXT:    v_subb_co_u32_e32 v14, vcc, v0, v2, vcc
1389; GFX9-G-NEXT:    v_subb_co_u32_e32 v15, vcc, v1, v3, vcc
1390; GFX9-G-NEXT:    v_add_co_u32_e32 v20, vcc, -1, v20
1391; GFX9-G-NEXT:    v_addc_co_u32_e32 v21, vcc, -1, v21, vcc
1392; GFX9-G-NEXT:    v_addc_co_u32_e32 v22, vcc, -1, v22, vcc
1393; GFX9-G-NEXT:    v_addc_co_u32_e32 v23, vcc, -1, v23, vcc
1394; GFX9-G-NEXT:    v_lshlrev_b64 v[12:13], 1, v[12:13]
1395; GFX9-G-NEXT:    v_or_b32_e32 v0, v20, v22
1396; GFX9-G-NEXT:    v_or_b32_e32 v1, v21, v23
1397; GFX9-G-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1398; GFX9-G-NEXT:    v_or_b32_e32 v12, v12, v8
1399; GFX9-G-NEXT:    v_and_b32_e32 v8, 1, v28
1400; GFX9-G-NEXT:    v_mov_b32_e32 v0, v8
1401; GFX9-G-NEXT:    s_or_b64 s[8:9], vcc, s[8:9]
1402; GFX9-G-NEXT:    v_mov_b32_e32 v1, v9
1403; GFX9-G-NEXT:    s_andn2_b64 exec, exec, s[8:9]
1404; GFX9-G-NEXT:    s_cbranch_execnz .LBB0_3
1405; GFX9-G-NEXT:  ; %bb.4: ; %Flow
1406; GFX9-G-NEXT:    s_or_b64 exec, exec, s[8:9]
1407; GFX9-G-NEXT:  .LBB0_5: ; %Flow2
1408; GFX9-G-NEXT:    s_or_b64 exec, exec, s[12:13]
1409; GFX9-G-NEXT:    v_lshlrev_b64 v[2:3], 1, v[6:7]
1410; GFX9-G-NEXT:    v_lshlrev_b64 v[12:13], 1, v[12:13]
1411; GFX9-G-NEXT:    v_lshrrev_b32_e32 v4, 31, v7
1412; GFX9-G-NEXT:    v_or_b32_e32 v12, v12, v4
1413; GFX9-G-NEXT:    v_or_b32_e32 v6, v0, v2
1414; GFX9-G-NEXT:    v_or_b32_e32 v7, v1, v3
1415; GFX9-G-NEXT:  .LBB0_6: ; %Flow3
1416; GFX9-G-NEXT:    s_or_b64 exec, exec, s[6:7]
1417; GFX9-G-NEXT:    v_xor_b32_e32 v3, v17, v16
1418; GFX9-G-NEXT:    v_xor_b32_e32 v0, v6, v3
1419; GFX9-G-NEXT:    v_xor_b32_e32 v1, v7, v3
1420; GFX9-G-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
1421; GFX9-G-NEXT:    v_xor_b32_e32 v2, v12, v3
1422; GFX9-G-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
1423; GFX9-G-NEXT:    v_xor_b32_e32 v4, v13, v3
1424; GFX9-G-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v3, vcc
1425; GFX9-G-NEXT:    v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
1426; GFX9-G-NEXT:    s_setpc_b64 s[30:31]
1427;
1428; GFX9-G-O0-LABEL: v_sdiv_i128_vv:
1429; GFX9-G-O0:       ; %bb.0: ; %_udiv-special-cases
1430; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1431; GFX9-G-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
1432; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
1433; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
1434; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v1
1435; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v2
1436; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v3
1437; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
1438; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v10
1439; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v9
1440; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v8
1441; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
1442; GFX9-G-O0-NEXT:    s_nop 0
1443; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
1444; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
1445; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
1446; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v4
1447; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v5
1448; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v6
1449; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
1450; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
1451; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
1452; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
1453; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
1454; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v1
1455; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v0
1456; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v7
1457; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], 0
1458; GFX9-G-O0-NEXT:    s_mov_b64 s[8:9], 0x7f
1459; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr3_vgpr4 killed $exec
1460; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
1461; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v5
1462; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v6
1463; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
1464; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v1
1465; GFX9-G-O0-NEXT:    s_mov_b32 s6, 31
1466; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s6
1467; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v11, v2, v7
1468; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
1469; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
1470; GFX9-G-O0-NEXT:    s_mov_b32 s6, 31
1471; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
1472; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v9, v0, v1
1473; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr12_vgpr13 killed $exec
1474; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v14
1475; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v15
1476; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
1477; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v1
1478; GFX9-G-O0-NEXT:    s_mov_b32 s6, 31
1479; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s6
1480; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v10, v2, v7
1481; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
1482; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
1483; GFX9-G-O0-NEXT:    s_mov_b32 s6, 31
1484; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
1485; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v8, v0, v1
1486; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v3
1487; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v4
1488; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v5
1489; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v6
1490; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v1
1491; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v2
1492; GFX9-G-O0-NEXT:    v_xor_b32_e64 v0, v11, v0
1493; GFX9-G-O0-NEXT:    v_xor_b32_e64 v1, v11, v1
1494; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v4
1495; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v5
1496; GFX9-G-O0-NEXT:    v_xor_b32_e64 v3, v9, v3
1497; GFX9-G-O0-NEXT:    v_xor_b32_e64 v2, v9, v2
1498; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v0, s[6:7], v0, v11
1499; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
1500; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v1, s[6:7], v1, v11, s[6:7]
1501; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
1502; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v5, s[6:7], v3, v9, s[6:7]
1503; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
1504; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v2, s[6:7], v2, v9, s[6:7]
1505; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
1506; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v12
1507; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v13
1508; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v14
1509; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v15
1510; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v6
1511; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v7
1512; GFX9-G-O0-NEXT:    v_xor_b32_e64 v7, v10, v4
1513; GFX9-G-O0-NEXT:    v_xor_b32_e64 v4, v10, v3
1514; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v12
1515; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v13
1516; GFX9-G-O0-NEXT:    v_xor_b32_e64 v6, v8, v6
1517; GFX9-G-O0-NEXT:    v_xor_b32_e64 v3, v8, v3
1518; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v7, s[6:7], v7, v10
1519; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
1520; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v4, s[6:7], v4, v10, s[6:7]
1521; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
1522; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v6, s[6:7], v6, v8, s[6:7]
1523; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
1524; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v3, s[6:7], v3, v8, s[6:7]
1525; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
1526; GFX9-G-O0-NEXT:    v_xor_b32_e64 v12, v10, v11
1527; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
1528; GFX9-G-O0-NEXT:    v_xor_b32_e64 v10, v10, v11
1529; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
1530; GFX9-G-O0-NEXT:    v_xor_b32_e64 v10, v8, v9
1531; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
1532; GFX9-G-O0-NEXT:    v_xor_b32_e64 v8, v8, v9
1533; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
1534; GFX9-G-O0-NEXT:    v_or_b32_e64 v8, v7, v6
1535; GFX9-G-O0-NEXT:    v_or_b32_e64 v10, v4, v3
1536; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1537; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
1538; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s5
1539; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s4
1540; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[6:7], v[8:9], v[10:11]
1541; GFX9-G-O0-NEXT:    v_or_b32_e64 v8, v0, v5
1542; GFX9-G-O0-NEXT:    v_or_b32_e64 v10, v1, v2
1543; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1544; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
1545; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s5
1546; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s4
1547; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], v[8:9], v[10:11]
1548; GFX9-G-O0-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
1549; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v6
1550; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v3
1551; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s5
1552; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s4
1553; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], v[8:9], v[10:11]
1554; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v4, v4
1555; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v7, v7
1556; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, 32
1557; GFX9-G-O0-NEXT:    v_add_u32_e64 v7, v7, v8
1558; GFX9-G-O0-NEXT:    v_min_u32_e64 v4, v4, v7
1559; GFX9-G-O0-NEXT:    s_mov_b32 s12, 64
1560; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s12
1561; GFX9-G-O0-NEXT:    v_add_u32_e64 v4, v4, v7
1562; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v3, v3
1563; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v6, v6
1564; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 32
1565; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v6, v7
1566; GFX9-G-O0-NEXT:    v_min_u32_e64 v3, v3, v6
1567; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[10:11]
1568; GFX9-G-O0-NEXT:    s_mov_b32 s16, 0
1569; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v5
1570; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v2
1571; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, s5
1572; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s4
1573; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[10:11], v[6:7], v[8:9]
1574; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v4, v1
1575; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v6, v0
1576; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 32
1577; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v6, v7
1578; GFX9-G-O0-NEXT:    v_min_u32_e64 v4, v4, v6
1579; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s12
1580; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v4, v6
1581; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v4, v2
1582; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v7, v5
1583; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, 32
1584; GFX9-G-O0-NEXT:    v_add_u32_e64 v7, v7, v8
1585; GFX9-G-O0-NEXT:    v_min_u32_e64 v4, v4, v7
1586; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v6, s[10:11]
1587; GFX9-G-O0-NEXT:    s_mov_b32 s15, 0
1588; GFX9-G-O0-NEXT:    s_mov_b32 s13, 0
1589; GFX9-G-O0-NEXT:    s_mov_b32 s14, 0
1590; GFX9-G-O0-NEXT:    s_mov_b32 s12, 0
1591; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v6, s[10:11], v3, v4
1592; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
1593; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s16
1594; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s16
1595; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v3, s[10:11], v3, v4, s[10:11]
1596; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
1597; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s15
1598; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s14
1599; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v8, s[10:11], v4, v7, s[10:11]
1600; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
1601; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s13
1602; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s12
1603; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v7, s[10:11], v4, v7, s[10:11]
1604; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
1605; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v6
1606; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v3
1607; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v8
1608; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v7
1609; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s9
1610; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s8
1611; GFX9-G-O0-NEXT:    v_cmp_gt_u64_e64 s[12:13], v[11:12], v[13:14]
1612; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s5
1613; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s4
1614; GFX9-G-O0-NEXT:    v_cmp_gt_u64_e64 s[10:11], v[9:10], v[11:12]
1615; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s5
1616; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s4
1617; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12]
1618; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, 1
1619; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
1620; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v4, v9, s[12:13]
1621; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, 1
1622; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
1623; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[10:11]
1624; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v4, v9, s[8:9]
1625; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, 1
1626; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
1627; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
1628; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v9
1629; GFX9-G-O0-NEXT:    s_mov_b32 s7, 0x7f
1630; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
1631; GFX9-G-O0-NEXT:    v_xor_b32_e64 v6, v6, s7
1632; GFX9-G-O0-NEXT:    v_xor_b32_e64 v3, v3, s6
1633; GFX9-G-O0-NEXT:    v_or_b32_e64 v6, v6, v8
1634; GFX9-G-O0-NEXT:    v_or_b32_e64 v3, v3, v7
1635; GFX9-G-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
1636; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v3
1637; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, s5
1638; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s4
1639; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[6:7], v[8:9]
1640; GFX9-G-O0-NEXT:    v_and_b32_e32 v3, 1, v4
1641; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v3
1642; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 0
1643; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, 0
1644; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v6, s[6:7]
1645; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v1, v3, s[6:7]
1646; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1647; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v3
1648; GFX9-G-O0-NEXT:    v_and_b32_e32 v3, 1, v4
1649; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v3
1650; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 0
1651; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, 0
1652; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[6:7]
1653; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[6:7]
1654; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
1655; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v2
1656; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
1657; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v5
1658; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v6
1659; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 1
1660; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, 0
1661; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
1662; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v5
1663; GFX9-G-O0-NEXT:    v_and_b32_e32 v4, 1, v4
1664; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v4
1665; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], -1
1666; GFX9-G-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
1667; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1668; GFX9-G-O0-NEXT:    s_nop 0
1669; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
1670; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
1671; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
1672; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], exec
1673; GFX9-G-O0-NEXT:    ; implicit-def: $vgpr34 : SGPR spill to VGPR lane
1674; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s4, 0
1675; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s5, 1
1676; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
1677; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
1678; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
1679; GFX9-G-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
1680; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
1681; GFX9-G-O0-NEXT:    s_cbranch_execz .LBB0_3
1682; GFX9-G-O0-NEXT:    s_branch .LBB0_8
1683; GFX9-G-O0-NEXT:  .LBB0_1: ; %Flow
1684; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
1685; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
1686; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
1687; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
1688; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v34, 2
1689; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v34, 3
1690; GFX9-G-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
1691; GFX9-G-O0-NEXT:  ; %bb.2: ; %Flow
1692; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
1693; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
1694; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
1695; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
1696; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
1697; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
1698; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
1699; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
1700; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
1701; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
1702; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
1703; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
1704; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
1705; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
1706; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
1707; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
1708; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
1709; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
1710; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
1711; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
1712; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
1713; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
1714; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
1715; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
1716; GFX9-G-O0-NEXT:    s_branch .LBB0_5
1717; GFX9-G-O0-NEXT:  .LBB0_3: ; %Flow2
1718; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
1719; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
1720; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
1721; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
1722; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v34, 0
1723; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v34, 1
1724; GFX9-G-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
1725; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1726; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
1727; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
1728; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
1729; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
1730; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
1731; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
1732; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
1733; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
1734; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
1735; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
1736; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
1737; GFX9-G-O0-NEXT:    s_branch .LBB0_9
1738; GFX9-G-O0-NEXT:  .LBB0_4: ; %udiv-loop-exit
1739; GFX9-G-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
1740; GFX9-G-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
1741; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
1742; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
1743; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
1744; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
1745; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
1746; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
1747; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(2)
1748; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v4
1749; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v5
1750; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
1751; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v6
1752; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v7
1753; GFX9-G-O0-NEXT:    s_mov_b32 s4, 1
1754; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s4
1755; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[10:11], v0, v[2:3]
1756; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s4
1757; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[0:1], v0, v[4:5]
1758; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr2 killed $exec
1759; GFX9-G-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec
1760; GFX9-G-O0-NEXT:    s_mov_b32 s4, 31
1761; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s4
1762; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v6, v2, v3
1763; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, 0
1764; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v0
1765; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v1
1766; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v14
1767; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v15
1768; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v16
1769; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v17
1770; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v12
1771; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v13
1772; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v10
1773; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v11
1774; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v7
1775; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v1, v5
1776; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1777; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v5
1778; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
1779; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v9
1780; GFX9-G-O0-NEXT:    v_or3_b32 v4, v4, v6, v7
1781; GFX9-G-O0-NEXT:    v_or3_b32 v2, v2, v3, v5
1782; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
1783; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v2
1784; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
1785; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v4
1786; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v5
1787; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1788; GFX9-G-O0-NEXT:    s_nop 0
1789; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
1790; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
1791; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
1792; GFX9-G-O0-NEXT:    s_branch .LBB0_3
1793; GFX9-G-O0-NEXT:  .LBB0_5: ; %Flow1
1794; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
1795; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
1796; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
1797; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
1798; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v34, 4
1799; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v34, 5
1800; GFX9-G-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
1801; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
1802; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
1803; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
1804; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
1805; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
1806; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
1807; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
1808; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
1809; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
1810; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
1811; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
1812; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
1813; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
1814; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
1815; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
1816; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
1817; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
1818; GFX9-G-O0-NEXT:    s_nop 0
1819; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
1820; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
1821; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
1822; GFX9-G-O0-NEXT:    s_branch .LBB0_4
1823; GFX9-G-O0-NEXT:  .LBB0_6: ; %udiv-do-while
1824; GFX9-G-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
1825; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
1826; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
1827; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
1828; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
1829; GFX9-G-O0-NEXT:    v_readlane_b32 s6, v34, 6
1830; GFX9-G-O0-NEXT:    v_readlane_b32 s7, v34, 7
1831; GFX9-G-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
1832; GFX9-G-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
1833; GFX9-G-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
1834; GFX9-G-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
1835; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
1836; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
1837; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
1838; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
1839; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
1840; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
1841; GFX9-G-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
1842; GFX9-G-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
1843; GFX9-G-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
1844; GFX9-G-O0-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
1845; GFX9-G-O0-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
1846; GFX9-G-O0-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
1847; GFX9-G-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
1848; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
1849; GFX9-G-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
1850; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
1851; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
1852; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
1853; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
1854; GFX9-G-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
1855; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], 0
1856; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(18)
1857; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v2
1858; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v3
1859; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(16)
1860; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v4
1861; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v5
1862; GFX9-G-O0-NEXT:    s_mov_b32 s8, 1
1863; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
1864; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[14:15], v2, v[0:1]
1865; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
1866; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[4:5], v2, v[3:4]
1867; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
1868; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
1869; GFX9-G-O0-NEXT:    s_mov_b32 s9, 31
1870; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s9
1871; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v3, v0, v1
1872; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, 0
1873; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v4
1874; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v5
1875; GFX9-G-O0-NEXT:    v_or_b32_e64 v7, v2, v3
1876; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v0, v1
1877; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr22_vgpr23 killed $exec
1878; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v24
1879; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v25
1880; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
1881; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
1882; GFX9-G-O0-NEXT:    s_mov_b32 s9, 31
1883; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s9
1884; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v3, v0, v1
1885; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, 0
1886; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v14
1887; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v15
1888; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v2, v3
1889; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v0, v1
1890; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v22
1891; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v23
1892; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v24
1893; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v25
1894; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
1895; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[26:27], v0, v[2:3]
1896; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
1897; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[0:1], v0, v[14:15]
1898; GFX9-G-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr2 killed $exec
1899; GFX9-G-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec
1900; GFX9-G-O0-NEXT:    s_mov_b32 s8, 31
1901; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
1902; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v22, v2, v3
1903; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, 0
1904; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v0
1905; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v1
1906; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(10)
1907; GFX9-G-O0-NEXT:    v_mov_b32_e32 v28, v30
1908; GFX9-G-O0-NEXT:    v_mov_b32_e32 v29, v31
1909; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(8)
1910; GFX9-G-O0-NEXT:    v_mov_b32_e32 v24, v32
1911; GFX9-G-O0-NEXT:    v_mov_b32_e32 v25, v33
1912; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v28
1913; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v29
1914; GFX9-G-O0-NEXT:    v_mov_b32_e32 v23, v26
1915; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v27
1916; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v23
1917; GFX9-G-O0-NEXT:    v_or_b32_e64 v15, v1, v15
1918; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1919; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v15
1920; GFX9-G-O0-NEXT:    v_mov_b32_e32 v23, v24
1921; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v25
1922; GFX9-G-O0-NEXT:    v_or3_b32 v14, v14, v22, v23
1923; GFX9-G-O0-NEXT:    v_or3_b32 v2, v2, v3, v15
1924; GFX9-G-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
1925; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v2
1926; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
1927; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v14
1928; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v15
1929; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
1930; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v13, s[8:9], v13, v4
1931; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v12, s[8:9], v12, v9, s[8:9]
1932; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v10, s[8:9], v10, v7, s[8:9]
1933; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v12, s[8:9], v6, v5, s[8:9]
1934; GFX9-G-O0-NEXT:    s_mov_b32 s8, 31
1935; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s8
1936; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v10, v6, v12
1937; GFX9-G-O0-NEXT:    s_mov_b32 s8, 31
1938; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s8
1939; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v6, v6, v12
1940; GFX9-G-O0-NEXT:    s_mov_b32 s9, 1
1941; GFX9-G-O0-NEXT:    s_mov_b32 s8, 0
1942; GFX9-G-O0-NEXT:    v_and_b32_e64 v12, v10, s9
1943; GFX9-G-O0-NEXT:    v_and_b32_e64 v14, v10, s8
1944; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
1945; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v14
1946; GFX9-G-O0-NEXT:    v_mov_b32_e32 v23, s5
1947; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, s4
1948; GFX9-G-O0-NEXT:    ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
1949; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v22
1950; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v23
1951; GFX9-G-O0-NEXT:    v_and_b32_e64 v11, v10, v11
1952; GFX9-G-O0-NEXT:    v_and_b32_e64 v10, v10, v21
1953; GFX9-G-O0-NEXT:    v_and_b32_e64 v8, v6, v8
1954; GFX9-G-O0-NEXT:    v_and_b32_e64 v6, v6, v20
1955; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v4, s[8:9], v4, v11
1956; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v10, s[8:9], v9, v10, s[8:9]
1957; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v9, s[8:9], v7, v8, s[8:9]
1958; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v8, s[8:9], v5, v6, s[8:9]
1959; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
1960; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v10
1961; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v9
1962; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
1963; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v16
1964; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v17
1965; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v18
1966; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v19
1967; GFX9-G-O0-NEXT:    s_mov_b32 s8, -1
1968; GFX9-G-O0-NEXT:    s_mov_b32 s12, -1
1969; GFX9-G-O0-NEXT:    s_mov_b32 s11, -1
1970; GFX9-G-O0-NEXT:    s_mov_b32 s10, -1
1971; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, s8
1972; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v16, s[8:9], v11, v16
1973; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s12
1974; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v17, s[8:9], v10, v11, s[8:9]
1975; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s11
1976; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v19, s[8:9], v9, v10, s[8:9]
1977; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, s10
1978; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v18, s[8:9], v8, v9, s[8:9]
1979; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v16
1980; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v17
1981; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v19
1982; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v18
1983; GFX9-G-O0-NEXT:    v_or_b32_e64 v16, v16, v19
1984; GFX9-G-O0-NEXT:    v_or_b32_e64 v18, v17, v18
1985; GFX9-G-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
1986; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v18
1987; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, s5
1988; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, s4
1989; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[16:17], v[18:19]
1990; GFX9-G-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
1991; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v3
1992; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v2
1993; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v1
1994; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v0
1995; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
1996; GFX9-G-O0-NEXT:    s_nop 0
1997; GFX9-G-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
1998; GFX9-G-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
1999; GFX9-G-O0-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
2000; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v15
2001; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v14
2002; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v13
2003; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v12
2004; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
2005; GFX9-G-O0-NEXT:    s_nop 0
2006; GFX9-G-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
2007; GFX9-G-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
2008; GFX9-G-O0-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
2009; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
2010; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s6, 2
2011; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s7, 3
2012; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
2013; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s6, 6
2014; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s7, 7
2015; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
2016; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
2017; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
2018; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
2019; GFX9-G-O0-NEXT:    s_nop 0
2020; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
2021; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
2022; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
2023; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
2024; GFX9-G-O0-NEXT:    s_nop 0
2025; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
2026; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
2027; GFX9-G-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
2028; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
2029; GFX9-G-O0-NEXT:    s_nop 0
2030; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
2031; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
2032; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
2033; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
2034; GFX9-G-O0-NEXT:    s_nop 0
2035; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
2036; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
2037; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
2038; GFX9-G-O0-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2039; GFX9-G-O0-NEXT:    s_cbranch_execnz .LBB0_6
2040; GFX9-G-O0-NEXT:    s_branch .LBB0_1
2041; GFX9-G-O0-NEXT:  .LBB0_7: ; %udiv-preheader
2042; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
2043; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
2044; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
2045; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
2046; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
2047; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
2048; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
2049; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
2050; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
2051; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
2052; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
2053; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
2054; GFX9-G-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
2055; GFX9-G-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
2056; GFX9-G-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
2057; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
2058; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
2059; GFX9-G-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
2060; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
2061; GFX9-G-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
2062; GFX9-G-O0-NEXT:    s_mov_b32 s4, 64
2063; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
2064; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v17
2065; GFX9-G-O0-NEXT:    v_mov_b32_e32 v23, v16
2066; GFX9-G-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
2067; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(1)
2068; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v4
2069; GFX9-G-O0-NEXT:    s_mov_b32 s5, 0xffffffc0
2070; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s5
2071; GFX9-G-O0-NEXT:    v_add_u32_e64 v4, v18, v4
2072; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s4
2073; GFX9-G-O0-NEXT:    v_sub_u32_e64 v5, v5, v18
2074; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
2075; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s4
2076; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v18, v6
2077; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s6
2078; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v18, v6
2079; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
2080; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[6:7], v18, v[20:21]
2081; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[25:26], v18, v[22:23]
2082; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[23:24], v5, v[20:21]
2083; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v25
2084; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v26
2085; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v23
2086; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v24
2087; GFX9-G-O0-NEXT:    v_or_b32_e64 v19, v19, v22
2088; GFX9-G-O0-NEXT:    v_or_b32_e64 v18, v5, v18
2089; GFX9-G-O0-NEXT:    s_mov_b64 s[8:9], 0
2090; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[20:21], v4, v[20:21]
2091; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v20
2092; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v21
2093; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v19, s[4:5]
2094; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v18, s[4:5]
2095; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v17, s[6:7]
2096; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v16, v5, v16, s[6:7]
2097; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2098; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v16
2099; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v6
2100; GFX9-G-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
2101; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, 0
2102; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 0
2103; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v16, v16, v17, s[4:5]
2104; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
2105; GFX9-G-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
2106; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v6
2107; GFX9-G-O0-NEXT:    ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
2108; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v16
2109; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v17
2110; GFX9-G-O0-NEXT:    s_mov_b32 s4, -1
2111; GFX9-G-O0-NEXT:    s_mov_b32 s10, -1
2112; GFX9-G-O0-NEXT:    s_mov_b32 s7, -1
2113; GFX9-G-O0-NEXT:    s_mov_b32 s6, -1
2114; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, s4
2115; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v15, s[4:5], v15, v16
2116; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
2117; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s10
2118; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v14, s[4:5], v14, v15, s[4:5]
2119; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
2120; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s7
2121; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v13, s[4:5], v13, v14, s[4:5]
2122; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
2123; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s6
2124; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v12, s[4:5], v12, v13, s[4:5]
2125; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
2126; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], s[8:9]
2127; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], s[8:9]
2128; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s8, 6
2129; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s9, 7
2130; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
2131; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
2132; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
2133; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s7
2134; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s6
2135; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s5
2136; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s4
2137; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
2138; GFX9-G-O0-NEXT:    s_nop 0
2139; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
2140; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
2141; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
2142; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
2143; GFX9-G-O0-NEXT:    s_nop 0
2144; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
2145; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
2146; GFX9-G-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
2147; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
2148; GFX9-G-O0-NEXT:    s_nop 0
2149; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
2150; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
2151; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
2152; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
2153; GFX9-G-O0-NEXT:    s_nop 0
2154; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
2155; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
2156; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
2157; GFX9-G-O0-NEXT:    s_branch .LBB0_6
2158; GFX9-G-O0-NEXT:  .LBB0_8: ; %udiv-bb1
2159; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
2160; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
2161; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
2162; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
2163; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
2164; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
2165; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
2166; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
2167; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
2168; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
2169; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
2170; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], 0
2171; GFX9-G-O0-NEXT:    s_mov_b32 s6, 1
2172; GFX9-G-O0-NEXT:    s_mov_b32 s10, 0
2173; GFX9-G-O0-NEXT:    s_mov_b32 s9, 0
2174; GFX9-G-O0-NEXT:    s_mov_b32 s8, 0
2175; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s6
2176; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
2177; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v4, s[6:7], v2, v4
2178; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
2179; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s10
2180; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(1)
2181; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v5, s[6:7], v5, v7, s[6:7]
2182; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s9
2183; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v7, s[6:7], v6, v7, s[6:7]
2184; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s8
2185; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v1, v6, s[6:7]
2186; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v4
2187; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v5
2188; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v7
2189; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v6
2190; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
2191; GFX9-G-O0-NEXT:    s_nop 0
2192; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
2193; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
2194; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
2195; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0x7f
2196; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s6
2197; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v8, s[6:7], v1, v2
2198; GFX9-G-O0-NEXT:    s_mov_b32 s7, 64
2199; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
2200; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v0
2201; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v9
2202; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v3
2203; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0xffffffc0
2204; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
2205; GFX9-G-O0-NEXT:    v_add_u32_e64 v2, v8, v0
2206; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
2207; GFX9-G-O0-NEXT:    v_sub_u32_e64 v14, v0, v8
2208; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
2209; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
2210; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[8:9], v8, v0
2211; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
2212; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v8, v0
2213; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[0:1], v8, v[12:13]
2214; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[17:18], v14, v[12:13]
2215; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[15:16], v8, v[10:11]
2216; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v17
2217; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v18
2218; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v15
2219; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v16
2220; GFX9-G-O0-NEXT:    v_or_b32_e64 v11, v11, v14
2221; GFX9-G-O0-NEXT:    v_or_b32_e64 v10, v8, v10
2222; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[12:13], v2, v[12:13]
2223; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v0
2224; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v1
2225; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, 0
2226; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, 0
2227; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v8, s[8:9]
2228; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[8:9]
2229; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2230; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v2
2231; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v12
2232; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v13
2233; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v8, v8, v11, s[8:9]
2234; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[8:9]
2235; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[6:7]
2236; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[6:7]
2237; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
2238; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v2
2239; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
2240; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v8
2241; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v9
2242; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
2243; GFX9-G-O0-NEXT:    s_nop 0
2244; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
2245; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
2246; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
2247; GFX9-G-O0-NEXT:    s_mov_b64 s[8:9], s[4:5]
2248; GFX9-G-O0-NEXT:    s_mov_b64 s[10:11], s[4:5]
2249; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v7
2250; GFX9-G-O0-NEXT:    v_or_b32_e64 v6, v5, v6
2251; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2252; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v6
2253; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s5
2254; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s4
2255; GFX9-G-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[4:5], v[6:7]
2256; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
2257; GFX9-G-O0-NEXT:    s_nop 0
2258; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
2259; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
2260; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
2261; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
2262; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s9
2263; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s10
2264; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s11
2265; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
2266; GFX9-G-O0-NEXT:    s_nop 0
2267; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
2268; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
2269; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
2270; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], exec
2271; GFX9-G-O0-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
2272; GFX9-G-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
2273; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s6, 4
2274; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s7, 5
2275; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
2276; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
2277; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
2278; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
2279; GFX9-G-O0-NEXT:    s_cbranch_execz .LBB0_5
2280; GFX9-G-O0-NEXT:    s_branch .LBB0_7
2281; GFX9-G-O0-NEXT:  .LBB0_9: ; %udiv-end
2282; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
2283; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
2284; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
2285; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
2286; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
2287; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
2288; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
2289; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
2290; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(2)
2291; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v8
2292; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v9
2293; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
2294; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v10
2295; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v11
2296; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v1
2297; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v2
2298; GFX9-G-O0-NEXT:    v_xor_b32_e64 v0, v0, v7
2299; GFX9-G-O0-NEXT:    v_xor_b32_e64 v1, v1, v6
2300; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v8
2301; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v9
2302; GFX9-G-O0-NEXT:    v_xor_b32_e64 v2, v2, v5
2303; GFX9-G-O0-NEXT:    v_xor_b32_e64 v3, v3, v4
2304; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v0, s[4:5], v0, v7
2305; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v1, s[4:5], v1, v6, s[4:5]
2306; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v2, s[4:5], v2, v5, s[4:5]
2307; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
2308; GFX9-G-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
2309; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
2310; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
2311; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
2312; GFX9-G-O0-NEXT:    s_setpc_b64 s[30:31]
2313  %div = sdiv i128 %lhs, %rhs
2314  ret i128 %div
2315}
2316
2317define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2318; GFX9-LABEL: v_udiv_i128_vv:
2319; GFX9:       ; %bb.0: ; %_udiv-special-cases
2320; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2321; GFX9-NEXT:    v_or_b32_e32 v9, v5, v7
2322; GFX9-NEXT:    v_or_b32_e32 v8, v4, v6
2323; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
2324; GFX9-NEXT:    v_or_b32_e32 v9, v1, v3
2325; GFX9-NEXT:    v_or_b32_e32 v8, v0, v2
2326; GFX9-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[8:9]
2327; GFX9-NEXT:    v_ffbh_u32_e32 v8, v6
2328; GFX9-NEXT:    v_add_u32_e32 v8, 32, v8
2329; GFX9-NEXT:    v_ffbh_u32_e32 v9, v7
2330; GFX9-NEXT:    v_min_u32_e32 v8, v8, v9
2331; GFX9-NEXT:    v_ffbh_u32_e32 v9, v4
2332; GFX9-NEXT:    v_add_u32_e32 v9, 32, v9
2333; GFX9-NEXT:    v_ffbh_u32_e32 v10, v5
2334; GFX9-NEXT:    v_min_u32_e32 v9, v9, v10
2335; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2336; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, 64, v9
2337; GFX9-NEXT:    v_addc_co_u32_e64 v10, s[6:7], 0, 0, vcc
2338; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
2339; GFX9-NEXT:    v_ffbh_u32_e32 v11, v3
2340; GFX9-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
2341; GFX9-NEXT:    v_ffbh_u32_e32 v9, v2
2342; GFX9-NEXT:    v_add_u32_e32 v9, 32, v9
2343; GFX9-NEXT:    v_min_u32_e32 v9, v9, v11
2344; GFX9-NEXT:    v_ffbh_u32_e32 v11, v0
2345; GFX9-NEXT:    v_add_u32_e32 v11, 32, v11
2346; GFX9-NEXT:    v_ffbh_u32_e32 v12, v1
2347; GFX9-NEXT:    v_min_u32_e32 v11, v11, v12
2348; GFX9-NEXT:    v_cndmask_b32_e64 v10, v10, 0, vcc
2349; GFX9-NEXT:    v_add_co_u32_e32 v11, vcc, 64, v11
2350; GFX9-NEXT:    v_addc_co_u32_e64 v12, s[6:7], 0, 0, vcc
2351; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
2352; GFX9-NEXT:    s_mov_b64 s[6:7], 0x7f
2353; GFX9-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
2354; GFX9-NEXT:    v_cndmask_b32_e64 v13, v12, 0, vcc
2355; GFX9-NEXT:    v_sub_co_u32_e32 v12, vcc, v8, v9
2356; GFX9-NEXT:    v_subb_co_u32_e32 v13, vcc, v10, v13, vcc
2357; GFX9-NEXT:    v_mov_b32_e32 v8, 0
2358; GFX9-NEXT:    v_subbrev_co_u32_e32 v14, vcc, 0, v8, vcc
2359; GFX9-NEXT:    v_subbrev_co_u32_e32 v15, vcc, 0, v8, vcc
2360; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[12:13]
2361; GFX9-NEXT:    v_or_b32_e32 v10, v13, v15
2362; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2363; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
2364; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2365; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[14:15]
2366; GFX9-NEXT:    v_cndmask_b32_e32 v8, v9, v8, vcc
2367; GFX9-NEXT:    v_and_b32_e32 v8, 1, v8
2368; GFX9-NEXT:    v_xor_b32_e32 v9, 0x7f, v12
2369; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
2370; GFX9-NEXT:    v_or_b32_e32 v9, v9, v14
2371; GFX9-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
2372; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[9:10]
2373; GFX9-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
2374; GFX9-NEXT:    v_cndmask_b32_e64 v8, v3, 0, s[4:5]
2375; GFX9-NEXT:    v_cndmask_b32_e64 v9, v2, 0, s[4:5]
2376; GFX9-NEXT:    v_cndmask_b32_e64 v10, v1, 0, s[4:5]
2377; GFX9-NEXT:    v_cndmask_b32_e64 v11, v0, 0, s[4:5]
2378; GFX9-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
2379; GFX9-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
2380; GFX9-NEXT:    s_cbranch_execz .LBB1_6
2381; GFX9-NEXT:  ; %bb.1: ; %udiv-bb1
2382; GFX9-NEXT:    v_add_co_u32_e32 v18, vcc, 1, v12
2383; GFX9-NEXT:    v_addc_co_u32_e32 v19, vcc, 0, v13, vcc
2384; GFX9-NEXT:    v_addc_co_u32_e32 v20, vcc, 0, v14, vcc
2385; GFX9-NEXT:    v_addc_co_u32_e32 v21, vcc, 0, v15, vcc
2386; GFX9-NEXT:    v_sub_u32_e32 v15, 0x7f, v12
2387; GFX9-NEXT:    v_or_b32_e32 v9, v19, v21
2388; GFX9-NEXT:    v_or_b32_e32 v8, v18, v20
2389; GFX9-NEXT:    v_sub_u32_e32 v13, 64, v15
2390; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v15, v[2:3]
2391; GFX9-NEXT:    v_lshrrev_b64 v[13:14], v13, v[0:1]
2392; GFX9-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
2393; GFX9-NEXT:    v_sub_u32_e32 v8, 63, v12
2394; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v8, v[0:1]
2395; GFX9-NEXT:    v_or_b32_e32 v11, v11, v14
2396; GFX9-NEXT:    v_or_b32_e32 v10, v10, v13
2397; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v15
2398; GFX9-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[4:5]
2399; GFX9-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
2400; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v15, v[0:1]
2401; GFX9-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v15
2402; GFX9-NEXT:    v_mov_b32_e32 v12, 0
2403; GFX9-NEXT:    v_mov_b32_e32 v14, 0
2404; GFX9-NEXT:    v_cndmask_b32_e64 v9, v9, v3, s[6:7]
2405; GFX9-NEXT:    v_cndmask_b32_e64 v8, v8, v2, s[6:7]
2406; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, v11, s[4:5]
2407; GFX9-NEXT:    v_mov_b32_e32 v13, 0
2408; GFX9-NEXT:    v_mov_b32_e32 v15, 0
2409; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, v10, s[4:5]
2410; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2411; GFX9-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
2412; GFX9-NEXT:    s_cbranch_execz .LBB1_5
2413; GFX9-NEXT:  ; %bb.2: ; %udiv-preheader
2414; GFX9-NEXT:    v_sub_u32_e32 v14, 64, v18
2415; GFX9-NEXT:    v_lshrrev_b64 v[12:13], v18, v[0:1]
2416; GFX9-NEXT:    v_lshlrev_b64 v[14:15], v14, v[2:3]
2417; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v18
2418; GFX9-NEXT:    v_or_b32_e32 v14, v12, v14
2419; GFX9-NEXT:    v_subrev_u32_e32 v12, 64, v18
2420; GFX9-NEXT:    v_or_b32_e32 v15, v13, v15
2421; GFX9-NEXT:    v_lshrrev_b64 v[12:13], v12, v[2:3]
2422; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v18, v[2:3]
2423; GFX9-NEXT:    v_cndmask_b32_e32 v13, v13, v15, vcc
2424; GFX9-NEXT:    v_cndmask_b32_e32 v12, v12, v14, vcc
2425; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
2426; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
2427; GFX9-NEXT:    v_add_co_u32_e32 v22, vcc, -1, v4
2428; GFX9-NEXT:    v_addc_co_u32_e32 v23, vcc, -1, v5, vcc
2429; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v18
2430; GFX9-NEXT:    v_addc_co_u32_e32 v24, vcc, -1, v6, vcc
2431; GFX9-NEXT:    v_mov_b32_e32 v16, 0
2432; GFX9-NEXT:    v_mov_b32_e32 v14, 0
2433; GFX9-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[4:5]
2434; GFX9-NEXT:    v_cndmask_b32_e64 v0, v12, v0, s[4:5]
2435; GFX9-NEXT:    v_addc_co_u32_e32 v25, vcc, -1, v7, vcc
2436; GFX9-NEXT:    s_mov_b64 s[4:5], 0
2437; GFX9-NEXT:    v_mov_b32_e32 v17, 0
2438; GFX9-NEXT:    v_mov_b32_e32 v15, 0
2439; GFX9-NEXT:    v_mov_b32_e32 v13, 0
2440; GFX9-NEXT:  .LBB1_3: ; %udiv-do-while
2441; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
2442; GFX9-NEXT:    v_lshrrev_b32_e32 v12, 31, v11
2443; GFX9-NEXT:    v_lshlrev_b64 v[10:11], 1, v[10:11]
2444; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
2445; GFX9-NEXT:    v_or_b32_e32 v10, v16, v10
2446; GFX9-NEXT:    v_lshrrev_b32_e32 v16, 31, v1
2447; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
2448; GFX9-NEXT:    v_or_b32_e32 v2, v2, v16
2449; GFX9-NEXT:    v_lshrrev_b32_e32 v16, 31, v9
2450; GFX9-NEXT:    v_or_b32_e32 v0, v0, v16
2451; GFX9-NEXT:    v_sub_co_u32_e32 v16, vcc, v22, v0
2452; GFX9-NEXT:    v_subb_co_u32_e32 v16, vcc, v23, v1, vcc
2453; GFX9-NEXT:    v_subb_co_u32_e32 v16, vcc, v24, v2, vcc
2454; GFX9-NEXT:    v_subb_co_u32_e32 v16, vcc, v25, v3, vcc
2455; GFX9-NEXT:    v_ashrrev_i32_e32 v26, 31, v16
2456; GFX9-NEXT:    v_and_b32_e32 v16, v26, v4
2457; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v16
2458; GFX9-NEXT:    v_and_b32_e32 v16, v26, v5
2459; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v16, vcc
2460; GFX9-NEXT:    v_and_b32_e32 v16, v26, v6
2461; GFX9-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v16, vcc
2462; GFX9-NEXT:    v_and_b32_e32 v16, v26, v7
2463; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v16, vcc
2464; GFX9-NEXT:    v_add_co_u32_e32 v18, vcc, -1, v18
2465; GFX9-NEXT:    v_addc_co_u32_e32 v19, vcc, -1, v19, vcc
2466; GFX9-NEXT:    v_addc_co_u32_e32 v20, vcc, -1, v20, vcc
2467; GFX9-NEXT:    v_addc_co_u32_e32 v21, vcc, -1, v21, vcc
2468; GFX9-NEXT:    v_or_b32_e32 v11, v17, v11
2469; GFX9-NEXT:    v_lshlrev_b64 v[8:9], 1, v[8:9]
2470; GFX9-NEXT:    v_or_b32_e32 v16, v18, v20
2471; GFX9-NEXT:    v_or_b32_e32 v17, v19, v21
2472; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
2473; GFX9-NEXT:    v_or3_b32 v8, v8, v12, v14
2474; GFX9-NEXT:    v_and_b32_e32 v12, 1, v26
2475; GFX9-NEXT:    v_mov_b32_e32 v17, v13
2476; GFX9-NEXT:    v_or3_b32 v9, v9, 0, v15
2477; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2478; GFX9-NEXT:    v_mov_b32_e32 v16, v12
2479; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2480; GFX9-NEXT:    s_cbranch_execnz .LBB1_3
2481; GFX9-NEXT:  ; %bb.4: ; %Flow
2482; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
2483; GFX9-NEXT:  .LBB1_5: ; %Flow2
2484; GFX9-NEXT:    s_or_b64 exec, exec, s[6:7]
2485; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[10:11]
2486; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[8:9]
2487; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 31, v11
2488; GFX9-NEXT:    v_or3_b32 v8, v3, 0, v15
2489; GFX9-NEXT:    v_or3_b32 v9, v2, v4, v14
2490; GFX9-NEXT:    v_or_b32_e32 v10, v13, v1
2491; GFX9-NEXT:    v_or_b32_e32 v11, v12, v0
2492; GFX9-NEXT:  .LBB1_6: ; %Flow3
2493; GFX9-NEXT:    s_or_b64 exec, exec, s[8:9]
2494; GFX9-NEXT:    v_mov_b32_e32 v0, v11
2495; GFX9-NEXT:    v_mov_b32_e32 v1, v10
2496; GFX9-NEXT:    v_mov_b32_e32 v2, v9
2497; GFX9-NEXT:    v_mov_b32_e32 v3, v8
2498; GFX9-NEXT:    s_setpc_b64 s[30:31]
2499;
2500; GFX9-O0-LABEL: v_udiv_i128_vv:
2501; GFX9-O0:       ; %bb.0: ; %_udiv-special-cases
2502; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2503; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
2504; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
2505; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
2506; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v6
2507; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
2508; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v2
2509; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v0
2510; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
2511; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2512; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2513; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
2514; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
2515; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2516; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2517; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
2518; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v3
2519; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2520; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2521; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2522; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
2523; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2524; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2525; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
2526; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v7
2527; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
2528; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
2529; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
2530; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
2531; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v10
2532; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
2533; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
2534; GFX9-O0-NEXT:    s_nop 0
2535; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
2536; GFX9-O0-NEXT:    s_waitcnt vmcnt(2)
2537; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
2538; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
2539; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
2540; GFX9-O0-NEXT:    s_nop 0
2541; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
2542; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v12
2543; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
2544; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
2545; GFX9-O0-NEXT:    s_nop 0
2546; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
2547; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
2548; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
2549; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
2550; GFX9-O0-NEXT:    s_nop 0
2551; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
2552; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v11
2553; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
2554; GFX9-O0-NEXT:    v_or_b32_e64 v2, v7, v6
2555; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
2556; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
2557; GFX9-O0-NEXT:    v_or_b32_e64 v0, v4, v5
2558; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2559; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
2560; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
2561; GFX9-O0-NEXT:    ; implicit-def: $vgpr30 : SGPR spill to VGPR lane
2562; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 0
2563; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 1
2564; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[0:1], s[6:7]
2565; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v13
2566; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v9
2567; GFX9-O0-NEXT:    v_or_b32_e64 v14, v3, v1
2568; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v12
2569; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v8
2570; GFX9-O0-NEXT:    v_or_b32_e64 v8, v2, v0
2571; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
2572; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v14
2573; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
2574; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
2575; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v5
2576; GFX9-O0-NEXT:    s_mov_b32 s9, 32
2577; GFX9-O0-NEXT:    v_add_u32_e64 v5, v5, s9
2578; GFX9-O0-NEXT:    v_ffbh_u32_e64 v6, v6
2579; GFX9-O0-NEXT:    v_min_u32_e64 v5, v5, v6
2580; GFX9-O0-NEXT:    s_mov_b32 s8, 0
2581; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
2582; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s8
2583; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
2584; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v8
2585; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v6
2586; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v4
2587; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
2588; GFX9-O0-NEXT:    v_ffbh_u32_e64 v7, v7
2589; GFX9-O0-NEXT:    v_min_u32_e64 v14, v4, v7
2590; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
2591; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
2592; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
2593; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v4
2594; GFX9-O0-NEXT:    s_mov_b64 s[10:11], 64
2595; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
2596; GFX9-O0-NEXT:    s_mov_b32 s12, s10
2597; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v15
2598; GFX9-O0-NEXT:    s_mov_b32 s14, s11
2599; GFX9-O0-NEXT:    v_add_co_u32_e64 v7, s[12:13], v7, s12
2600; GFX9-O0-NEXT:    v_mov_b32_e32 v8, s14
2601; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13]
2602; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
2603; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v4
2604; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
2605; GFX9-O0-NEXT:    s_mov_b64 s[12:13], s[6:7]
2606; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[12:13], v[10:11], s[12:13]
2607; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[12:13]
2608; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
2609; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
2610; GFX9-O0-NEXT:    v_cndmask_b32_e64 v8, v5, v6, s[12:13]
2611; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
2612; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
2613; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
2614; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v4
2615; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v0
2616; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
2617; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v1
2618; GFX9-O0-NEXT:    v_min_u32_e64 v5, v4, v5
2619; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
2620; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
2621; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
2622; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
2623; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
2624; GFX9-O0-NEXT:    v_ffbh_u32_e64 v4, v2
2625; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s9
2626; GFX9-O0-NEXT:    v_ffbh_u32_e64 v10, v3
2627; GFX9-O0-NEXT:    v_min_u32_e64 v14, v4, v10
2628; GFX9-O0-NEXT:    ; implicit-def: $sgpr9
2629; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
2630; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
2631; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v4
2632; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v14
2633; GFX9-O0-NEXT:    s_mov_b32 s8, s10
2634; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v15
2635; GFX9-O0-NEXT:    s_mov_b32 s10, s11
2636; GFX9-O0-NEXT:    v_add_co_u32_e64 v10, s[8:9], v10, s8
2637; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s10
2638; GFX9-O0-NEXT:    v_addc_co_u32_e64 v4, s[8:9], v4, v11, s[8:9]
2639; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
2640; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v4
2641; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v11
2642; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
2643; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[8:9], v[12:13], s[8:9]
2644; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[8:9]
2645; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
2646; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
2647; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
2648; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
2649; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
2650; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
2651; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
2652; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
2653; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
2654; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
2655; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
2656; GFX9-O0-NEXT:    s_mov_b32 s10, s6
2657; GFX9-O0-NEXT:    s_mov_b32 s11, s7
2658; GFX9-O0-NEXT:    v_sub_co_u32_e32 v4, vcc, v4, v7
2659; GFX9-O0-NEXT:    v_subb_co_u32_e32 v8, vcc, v5, v6, vcc
2660; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s10
2661; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s10
2662; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v6, vcc
2663; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s11
2664; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
2665; GFX9-O0-NEXT:    v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
2666; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
2667; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
2668; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2669; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
2670; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2671; GFX9-O0-NEXT:    s_nop 0
2672; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2673; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
2674; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
2675; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
2676; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
2677; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2678; GFX9-O0-NEXT:    s_nop 0
2679; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2680; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
2681; GFX9-O0-NEXT:    s_mov_b64 s[12:13], 0x7f
2682; GFX9-O0-NEXT:    v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
2683; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[14:15]
2684; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
2685; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[14:15]
2686; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
2687; GFX9-O0-NEXT:    v_and_b32_e64 v6, 1, v6
2688; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[8:9], v6, 1
2689; GFX9-O0-NEXT:    s_or_b64 s[8:9], s[4:5], s[8:9]
2690; GFX9-O0-NEXT:    s_mov_b64 s[4:5], -1
2691; GFX9-O0-NEXT:    s_xor_b64 s[4:5], s[8:9], s[4:5]
2692; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
2693; GFX9-O0-NEXT:    s_mov_b32 s14, s13
2694; GFX9-O0-NEXT:    v_xor_b32_e64 v6, v6, s14
2695; GFX9-O0-NEXT:    ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
2696; GFX9-O0-NEXT:    v_xor_b32_e64 v4, v4, s12
2697; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2698; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
2699; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
2700; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v8
2701; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v9
2702; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
2703; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
2704; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
2705; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2706; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
2707; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
2708; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
2709; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v1, v4, s[8:9]
2710; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s10
2711; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[8:9]
2712; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
2713; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
2714; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2715; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
2716; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s11
2717; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v3, v4, s[8:9]
2718; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s10
2719; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[8:9]
2720; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
2721; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
2722; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
2723; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
2724; GFX9-O0-NEXT:    s_and_b64 s[6:7], s[4:5], s[6:7]
2725; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
2726; GFX9-O0-NEXT:    s_nop 0
2727; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
2728; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
2729; GFX9-O0-NEXT:    s_nop 0
2730; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
2731; GFX9-O0-NEXT:    s_mov_b64 s[4:5], exec
2732; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 2
2733; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 3
2734; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
2735; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
2736; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
2737; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
2738; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
2739; GFX9-O0-NEXT:    s_cbranch_execz .LBB1_3
2740; GFX9-O0-NEXT:    s_branch .LBB1_8
2741; GFX9-O0-NEXT:  .LBB1_1: ; %Flow
2742; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
2743; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
2744; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
2745; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2746; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 4
2747; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 5
2748; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
2749; GFX9-O0-NEXT:  ; %bb.2: ; %Flow
2750; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
2751; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
2752; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
2753; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
2754; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
2755; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
2756; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
2757; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
2758; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
2759; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
2760; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
2761; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
2762; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
2763; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
2764; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
2765; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
2766; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
2767; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
2768; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
2769; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
2770; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
2771; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
2772; GFX9-O0-NEXT:    s_waitcnt vmcnt(7)
2773; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
2774; GFX9-O0-NEXT:    s_branch .LBB1_5
2775; GFX9-O0-NEXT:  .LBB1_3: ; %Flow2
2776; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
2777; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
2778; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
2779; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2780; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 2
2781; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 3
2782; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
2783; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
2784; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
2785; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
2786; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
2787; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
2788; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
2789; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
2790; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
2791; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
2792; GFX9-O0-NEXT:    s_nop 0
2793; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
2794; GFX9-O0-NEXT:    s_branch .LBB1_9
2795; GFX9-O0-NEXT:  .LBB1_4: ; %udiv-loop-exit
2796; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
2797; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
2798; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
2799; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
2800; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
2801; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
2802; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
2803; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
2804; GFX9-O0-NEXT:    s_mov_b32 s4, 1
2805; GFX9-O0-NEXT:    s_waitcnt vmcnt(2)
2806; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s4, v[0:1]
2807; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2808; GFX9-O0-NEXT:    v_lshlrev_b64 v[9:10], s4, v[9:10]
2809; GFX9-O0-NEXT:    s_mov_b32 s4, 63
2810; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
2811; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v1
2812; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
2813; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v8
2814; GFX9-O0-NEXT:    v_or3_b32 v4, v4, v11, v12
2815; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
2816; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v9
2817; GFX9-O0-NEXT:    v_or3_b32 v0, v0, v1, v7
2818; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2819; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
2820; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
2821; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v6
2822; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v7
2823; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
2824; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
2825; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
2826; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
2827; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
2828; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
2829; GFX9-O0-NEXT:    s_nop 0
2830; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
2831; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
2832; GFX9-O0-NEXT:    s_nop 0
2833; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
2834; GFX9-O0-NEXT:    s_branch .LBB1_3
2835; GFX9-O0-NEXT:  .LBB1_5: ; %Flow1
2836; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
2837; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
2838; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
2839; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2840; GFX9-O0-NEXT:    v_readlane_b32 s4, v30, 6
2841; GFX9-O0-NEXT:    v_readlane_b32 s5, v30, 7
2842; GFX9-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
2843; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
2844; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
2845; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
2846; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
2847; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
2848; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
2849; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
2850; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
2851; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
2852; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
2853; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
2854; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
2855; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
2856; GFX9-O0-NEXT:    s_nop 0
2857; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
2858; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
2859; GFX9-O0-NEXT:    s_nop 0
2860; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
2861; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
2862; GFX9-O0-NEXT:    s_nop 0
2863; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
2864; GFX9-O0-NEXT:    s_branch .LBB1_4
2865; GFX9-O0-NEXT:  .LBB1_6: ; %udiv-do-while
2866; GFX9-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
2867; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
2868; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
2869; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
2870; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2871; GFX9-O0-NEXT:    v_readlane_b32 s6, v30, 8
2872; GFX9-O0-NEXT:    v_readlane_b32 s7, v30, 9
2873; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
2874; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
2875; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
2876; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
2877; GFX9-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
2878; GFX9-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
2879; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
2880; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
2881; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
2882; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
2883; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
2884; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
2885; GFX9-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
2886; GFX9-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
2887; GFX9-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
2888; GFX9-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
2889; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
2890; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
2891; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
2892; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
2893; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
2894; GFX9-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
2895; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
2896; GFX9-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
2897; GFX9-O0-NEXT:    s_mov_b32 s4, 63
2898; GFX9-O0-NEXT:    s_waitcnt vmcnt(16)
2899; GFX9-O0-NEXT:    v_lshrrev_b64 v[28:29], s4, v[2:3]
2900; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v29
2901; GFX9-O0-NEXT:    s_mov_b32 s5, 1
2902; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], s5, v[22:23]
2903; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v23
2904; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
2905; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v28
2906; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v22
2907; GFX9-O0-NEXT:    v_or_b32_e64 v22, v5, v10
2908; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
2909; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v4
2910; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[2:3]
2911; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], s4, v[6:7]
2912; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v29
2913; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
2914; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
2915; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v28
2916; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
2917; GFX9-O0-NEXT:    v_or_b32_e64 v4, v3, v4
2918; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2919; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
2920; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s5, v[0:1]
2921; GFX9-O0-NEXT:    v_lshlrev_b64 v[28:29], s5, v[6:7]
2922; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
2923; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v1
2924; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v29
2925; GFX9-O0-NEXT:    s_waitcnt vmcnt(10)
2926; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v27
2927; GFX9-O0-NEXT:    v_or3_b32 v6, v6, v7, v10
2928; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
2929; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v28
2930; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v26
2931; GFX9-O0-NEXT:    v_or3_b32 v0, v0, v1, v7
2932; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2933; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
2934; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v3
2935; GFX9-O0-NEXT:    s_waitcnt vmcnt(8)
2936; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v25
2937; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
2938; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
2939; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v24
2940; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
2941; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
2942; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
2943; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
2944; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v5
2945; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v22
2946; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
2947; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
2948; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v11
2949; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v14
2950; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v15
2951; GFX9-O0-NEXT:    v_sub_co_u32_e32 v13, vcc, v13, v6
2952; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
2953; GFX9-O0-NEXT:    v_subb_co_u32_e32 v12, vcc, v12, v10, vcc
2954; GFX9-O0-NEXT:    v_subb_co_u32_e32 v11, vcc, v11, v4, vcc
2955; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
2956; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
2957; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
2958; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
2959; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
2960; GFX9-O0-NEXT:    v_ashrrev_i64 v[13:14], s4, v[11:12]
2961; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v14
2962; GFX9-O0-NEXT:    s_mov_b64 s[4:5], 1
2963; GFX9-O0-NEXT:    s_mov_b32 s8, s5
2964; GFX9-O0-NEXT:    v_and_b32_e64 v12, v7, s8
2965; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v13
2966; GFX9-O0-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
2967; GFX9-O0-NEXT:    v_and_b32_e64 v14, v11, s4
2968; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
2969; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v12
2970; GFX9-O0-NEXT:    v_mov_b32_e32 v12, 0
2971; GFX9-O0-NEXT:    v_mov_b32_e32 v13, 0
2972; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v21
2973; GFX9-O0-NEXT:    v_and_b32_e64 v22, v7, v22
2974; GFX9-O0-NEXT:    v_and_b32_e64 v20, v11, v20
2975; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
2976; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v22
2977; GFX9-O0-NEXT:    v_mov_b32_e32 v22, v19
2978; GFX9-O0-NEXT:    v_and_b32_e64 v7, v7, v22
2979; GFX9-O0-NEXT:    v_and_b32_e64 v22, v11, v18
2980; GFX9-O0-NEXT:    ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
2981; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v7
2982; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v22
2983; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v23
2984; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
2985; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v21
2986; GFX9-O0-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v19
2987; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
2988; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
2989; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
2990; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2991; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2992; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2993; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
2994; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2995; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
2996; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
2997; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v10
2998; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v8
2999; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 killed $vgpr8_vgpr9 killed $exec
3000; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
3001; GFX9-O0-NEXT:    s_mov_b32 s5, s8
3002; GFX9-O0-NEXT:    s_mov_b32 s4, s9
3003; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v16
3004; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
3005; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
3006; GFX9-O0-NEXT:    v_add_co_u32_e32 v19, vcc, v11, v16
3007; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s4
3008; GFX9-O0-NEXT:    v_addc_co_u32_e32 v9, vcc, v9, v11, vcc
3009; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s5
3010; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
3011; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s4
3012; GFX9-O0-NEXT:    v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
3013; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3014; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3015; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
3016; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v9
3017; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3018; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3019; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
3020; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v8
3021; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v16
3022; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v17
3023; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v19
3024; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v20
3025; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v17
3026; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v20
3027; GFX9-O0-NEXT:    v_or_b32_e64 v18, v18, v21
3028; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v16
3029; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v19
3030; GFX9-O0-NEXT:    v_or_b32_e64 v16, v16, v17
3031; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
3032; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v18
3033; GFX9-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13]
3034; GFX9-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
3035; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v3
3036; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v2
3037; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
3038; GFX9-O0-NEXT:    s_nop 0
3039; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
3040; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v1
3041; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v0
3042; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
3043; GFX9-O0-NEXT:    s_nop 0
3044; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
3045; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v15
3046; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v14
3047; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
3048; GFX9-O0-NEXT:    s_nop 0
3049; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
3050; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v13
3051; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v12
3052; GFX9-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
3053; GFX9-O0-NEXT:    s_nop 0
3054; GFX9-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
3055; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
3056; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 4
3057; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 5
3058; GFX9-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
3059; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 8
3060; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 9
3061; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
3062; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
3063; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
3064; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
3065; GFX9-O0-NEXT:    s_nop 0
3066; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
3067; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
3068; GFX9-O0-NEXT:    s_nop 0
3069; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
3070; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
3071; GFX9-O0-NEXT:    s_nop 0
3072; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
3073; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
3074; GFX9-O0-NEXT:    s_nop 0
3075; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
3076; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
3077; GFX9-O0-NEXT:    s_nop 0
3078; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
3079; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
3080; GFX9-O0-NEXT:    s_nop 0
3081; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
3082; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
3083; GFX9-O0-NEXT:    s_nop 0
3084; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
3085; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
3086; GFX9-O0-NEXT:    s_nop 0
3087; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
3088; GFX9-O0-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3089; GFX9-O0-NEXT:    s_cbranch_execnz .LBB1_6
3090; GFX9-O0-NEXT:    s_branch .LBB1_1
3091; GFX9-O0-NEXT:  .LBB1_7: ; %udiv-preheader
3092; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
3093; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
3094; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
3095; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
3096; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
3097; GFX9-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
3098; GFX9-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
3099; GFX9-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
3100; GFX9-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
3101; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
3102; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
3103; GFX9-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
3104; GFX9-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
3105; GFX9-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
3106; GFX9-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
3107; GFX9-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3108; GFX9-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3109; GFX9-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
3110; GFX9-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3111; GFX9-O0-NEXT:    s_waitcnt vmcnt(9)
3112; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v10
3113; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
3114; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], v4, v[20:21]
3115; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
3116; GFX9-O0-NEXT:    s_mov_b32 s6, 64
3117; GFX9-O0-NEXT:    v_sub_u32_e64 v12, s6, v4
3118; GFX9-O0-NEXT:    v_lshlrev_b64 v[22:23], v12, v[18:19]
3119; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v23
3120; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v12
3121; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
3122; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v22
3123; GFX9-O0-NEXT:    v_or_b32_e64 v6, v6, v7
3124; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
3125; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
3126; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
3127; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v4, s6
3128; GFX9-O0-NEXT:    v_sub_u32_e64 v5, v4, s6
3129; GFX9-O0-NEXT:    v_lshrrev_b64 v[22:23], v5, v[18:19]
3130; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v23
3131; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[4:5]
3132; GFX9-O0-NEXT:    s_mov_b32 s6, 0
3133; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v4, s6
3134; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v21
3135; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[6:7]
3136; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v6
3137; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v22
3138; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
3139; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v20
3140; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[6:7]
3141; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
3142; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
3143; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
3144; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
3145; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], v4, v[18:19]
3146; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v5
3147; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
3148; GFX9-O0-NEXT:    s_mov_b32 s8, s7
3149; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s8
3150; GFX9-O0-NEXT:    v_cndmask_b32_e64 v12, v12, v15, s[4:5]
3151; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
3152; GFX9-O0-NEXT:    s_mov_b32 s8, s6
3153; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
3154; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
3155; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3156; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3157; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
3158; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
3159; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v13
3160; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v14
3161; GFX9-O0-NEXT:    s_mov_b64 s[8:9], -1
3162; GFX9-O0-NEXT:    s_mov_b32 s5, s8
3163; GFX9-O0-NEXT:    s_mov_b32 s4, s9
3164; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v16
3165; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v17
3166; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
3167; GFX9-O0-NEXT:    v_add_co_u32_e32 v12, vcc, v12, v16
3168; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s4
3169; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v15, v16, vcc
3170; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s5
3171; GFX9-O0-NEXT:    v_addc_co_u32_e32 v14, vcc, v14, v15, vcc
3172; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s4
3173; GFX9-O0-NEXT:    v_addc_co_u32_e32 v13, vcc, v13, v15, vcc
3174; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3175; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3176; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
3177; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v13
3178; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3179; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3180; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
3181; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v16
3182; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
3183; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
3184; GFX9-O0-NEXT:    s_nop 0
3185; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
3186; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
3187; GFX9-O0-NEXT:    s_nop 0
3188; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
3189; GFX9-O0-NEXT:    s_mov_b64 s[4:5], s[6:7]
3190; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s9
3191; GFX9-O0-NEXT:    v_mov_b32_e32 v14, s8
3192; GFX9-O0-NEXT:    v_mov_b32_e32 v13, s7
3193; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s6
3194; GFX9-O0-NEXT:    v_writelane_b32 v30, s4, 8
3195; GFX9-O0-NEXT:    v_writelane_b32 v30, s5, 9
3196; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
3197; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
3198; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
3199; GFX9-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
3200; GFX9-O0-NEXT:    s_nop 0
3201; GFX9-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
3202; GFX9-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
3203; GFX9-O0-NEXT:    s_nop 0
3204; GFX9-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
3205; GFX9-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
3206; GFX9-O0-NEXT:    s_nop 0
3207; GFX9-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
3208; GFX9-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
3209; GFX9-O0-NEXT:    s_nop 0
3210; GFX9-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
3211; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
3212; GFX9-O0-NEXT:    s_nop 0
3213; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
3214; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
3215; GFX9-O0-NEXT:    s_nop 0
3216; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
3217; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
3218; GFX9-O0-NEXT:    s_nop 0
3219; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
3220; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
3221; GFX9-O0-NEXT:    s_nop 0
3222; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
3223; GFX9-O0-NEXT:    s_branch .LBB1_6
3224; GFX9-O0-NEXT:  .LBB1_8: ; %udiv-bb1
3225; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
3226; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
3227; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
3228; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
3229; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3230; GFX9-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3231; GFX9-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3232; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3233; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3234; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3235; GFX9-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3236; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 1
3237; GFX9-O0-NEXT:    s_mov_b32 s5, s6
3238; GFX9-O0-NEXT:    s_waitcnt vmcnt(1)
3239; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v0
3240; GFX9-O0-NEXT:    s_mov_b32 s4, s7
3241; GFX9-O0-NEXT:    s_mov_b64 s[6:7], 0
3242; GFX9-O0-NEXT:    s_mov_b32 s8, s6
3243; GFX9-O0-NEXT:    s_mov_b32 s9, s7
3244; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v4
3245; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
3246; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s5
3247; GFX9-O0-NEXT:    v_add_co_u32_e32 v8, vcc, v3, v4
3248; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s4
3249; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
3250; GFX9-O0-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v4, vcc
3251; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
3252; GFX9-O0-NEXT:    v_addc_co_u32_e32 v0, vcc, v0, v4, vcc
3253; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s9
3254; GFX9-O0-NEXT:    v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
3255; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3256; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3257; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
3258; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v1
3259; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3260; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3261; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3262; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
3263; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
3264; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
3265; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
3266; GFX9-O0-NEXT:    s_nop 0
3267; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
3268; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v8
3269; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v9
3270; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
3271; GFX9-O0-NEXT:    s_nop 0
3272; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
3273; GFX9-O0-NEXT:    s_mov_b32 s4, 0x7f
3274; GFX9-O0-NEXT:    v_sub_u32_e64 v2, s4, v3
3275; GFX9-O0-NEXT:    v_lshlrev_b64 v[4:5], v2, v[10:11]
3276; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v5
3277; GFX9-O0-NEXT:    s_mov_b32 s4, 64
3278; GFX9-O0-NEXT:    v_sub_u32_e64 v13, s4, v2
3279; GFX9-O0-NEXT:    v_lshrrev_b64 v[13:14], v13, v[6:7]
3280; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v14
3281; GFX9-O0-NEXT:    v_or_b32_e64 v12, v12, v15
3282; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
3283; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v13
3284; GFX9-O0-NEXT:    v_or_b32_e64 v4, v4, v5
3285; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
3286; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v12
3287; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v5
3288; GFX9-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v2, s4
3289; GFX9-O0-NEXT:    s_mov_b32 s10, 63
3290; GFX9-O0-NEXT:    v_sub_u32_e64 v3, s10, v3
3291; GFX9-O0-NEXT:    v_lshlrev_b64 v[12:13], v3, v[6:7]
3292; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v13
3293; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[4:5]
3294; GFX9-O0-NEXT:    s_mov_b32 s10, 0
3295; GFX9-O0-NEXT:    v_cmp_eq_u32_e64 s[10:11], v2, s10
3296; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v11
3297; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[10:11]
3298; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v4
3299; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v12
3300; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[4:5]
3301; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
3302; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[10:11]
3303; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
3304; GFX9-O0-NEXT:    ; implicit-def: $sgpr10
3305; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
3306; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v3
3307; GFX9-O0-NEXT:    v_lshlrev_b64 v[6:7], v2, v[6:7]
3308; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v7
3309; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s9
3310; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[4:5]
3311; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
3312; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s8
3313; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v3, v6, s[4:5]
3314; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3315; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
3316; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
3317; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v2
3318; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
3319; GFX9-O0-NEXT:    s_nop 0
3320; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
3321; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
3322; GFX9-O0-NEXT:    s_nop 0
3323; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
3324; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
3325; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v9
3326; GFX9-O0-NEXT:    v_or_b32_e64 v2, v2, v3
3327; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
3328; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v8
3329; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v1
3330; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3331; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
3332; GFX9-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7]
3333; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
3334; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s8
3335; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s9
3336; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s6
3337; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s7
3338; GFX9-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
3339; GFX9-O0-NEXT:    s_nop 0
3340; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
3341; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
3342; GFX9-O0-NEXT:    s_nop 0
3343; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
3344; GFX9-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
3345; GFX9-O0-NEXT:    s_nop 0
3346; GFX9-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
3347; GFX9-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
3348; GFX9-O0-NEXT:    s_nop 0
3349; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
3350; GFX9-O0-NEXT:    s_mov_b64 s[6:7], exec
3351; GFX9-O0-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
3352; GFX9-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
3353; GFX9-O0-NEXT:    v_writelane_b32 v30, s6, 6
3354; GFX9-O0-NEXT:    v_writelane_b32 v30, s7, 7
3355; GFX9-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
3356; GFX9-O0-NEXT:    buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
3357; GFX9-O0-NEXT:    s_mov_b64 exec, s[18:19]
3358; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
3359; GFX9-O0-NEXT:    s_cbranch_execz .LBB1_5
3360; GFX9-O0-NEXT:    s_branch .LBB1_7
3361; GFX9-O0-NEXT:  .LBB1_9: ; %udiv-end
3362; GFX9-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
3363; GFX9-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
3364; GFX9-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
3365; GFX9-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
3366; GFX9-O0-NEXT:    s_mov_b32 s4, 32
3367; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
3368; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[6:7]
3369; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
3370; GFX9-O0-NEXT:    v_lshrrev_b64 v[2:3], s4, v[4:5]
3371; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
3372; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v6
3373; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v4
3374; GFX9-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
3375; GFX9-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
3376; GFX9-O0-NEXT:    s_mov_b64 exec, s[4:5]
3377; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
3378; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
3379;
3380; GFX9-G-LABEL: v_udiv_i128_vv:
3381; GFX9-G:       ; %bb.0: ; %_udiv-special-cases
3382; GFX9-G-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3383; GFX9-G-NEXT:    v_or_b32_e32 v8, v4, v6
3384; GFX9-G-NEXT:    v_or_b32_e32 v9, v5, v7
3385; GFX9-G-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
3386; GFX9-G-NEXT:    v_or_b32_e32 v8, v0, v2
3387; GFX9-G-NEXT:    v_or_b32_e32 v9, v1, v3
3388; GFX9-G-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[8:9]
3389; GFX9-G-NEXT:    v_ffbh_u32_e32 v9, v4
3390; GFX9-G-NEXT:    v_ffbh_u32_e32 v8, v5
3391; GFX9-G-NEXT:    v_add_u32_e32 v9, 32, v9
3392; GFX9-G-NEXT:    v_ffbh_u32_e32 v10, v6
3393; GFX9-G-NEXT:    v_min_u32_e32 v8, v8, v9
3394; GFX9-G-NEXT:    v_ffbh_u32_e32 v9, v7
3395; GFX9-G-NEXT:    v_add_u32_e32 v10, 32, v10
3396; GFX9-G-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[6:7]
3397; GFX9-G-NEXT:    v_add_u32_e32 v8, 64, v8
3398; GFX9-G-NEXT:    v_min_u32_e32 v9, v9, v10
3399; GFX9-G-NEXT:    v_ffbh_u32_e32 v10, v0
3400; GFX9-G-NEXT:    v_cndmask_b32_e64 v8, v9, v8, s[6:7]
3401; GFX9-G-NEXT:    v_ffbh_u32_e32 v9, v1
3402; GFX9-G-NEXT:    v_add_u32_e32 v10, 32, v10
3403; GFX9-G-NEXT:    v_ffbh_u32_e32 v11, v2
3404; GFX9-G-NEXT:    v_min_u32_e32 v9, v9, v10
3405; GFX9-G-NEXT:    v_ffbh_u32_e32 v10, v3
3406; GFX9-G-NEXT:    v_add_u32_e32 v11, 32, v11
3407; GFX9-G-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[2:3]
3408; GFX9-G-NEXT:    v_add_u32_e32 v9, 64, v9
3409; GFX9-G-NEXT:    v_min_u32_e32 v10, v10, v11
3410; GFX9-G-NEXT:    v_cndmask_b32_e64 v9, v10, v9, s[6:7]
3411; GFX9-G-NEXT:    v_sub_co_u32_e64 v12, s[6:7], v8, v9
3412; GFX9-G-NEXT:    v_subb_co_u32_e64 v13, s[6:7], 0, 0, s[6:7]
3413; GFX9-G-NEXT:    v_mov_b32_e32 v8, 0x7f
3414; GFX9-G-NEXT:    v_subb_co_u32_e64 v14, s[6:7], 0, 0, s[6:7]
3415; GFX9-G-NEXT:    v_mov_b32_e32 v9, 0
3416; GFX9-G-NEXT:    v_subb_co_u32_e64 v15, s[6:7], 0, 0, s[6:7]
3417; GFX9-G-NEXT:    v_cmp_gt_u64_e64 s[6:7], v[12:13], v[8:9]
3418; GFX9-G-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3419; GFX9-G-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[6:7]
3420; GFX9-G-NEXT:    v_cmp_lt_u64_e64 s[6:7], 0, v[14:15]
3421; GFX9-G-NEXT:    v_or_b32_e32 v17, v13, v15
3422; GFX9-G-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[6:7]
3423; GFX9-G-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[14:15]
3424; GFX9-G-NEXT:    s_mov_b64 s[8:9], 0
3425; GFX9-G-NEXT:    v_cndmask_b32_e64 v8, v9, v8, s[6:7]
3426; GFX9-G-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
3427; GFX9-G-NEXT:    v_or_b32_e32 v18, v9, v8
3428; GFX9-G-NEXT:    v_xor_b32_e32 v8, 0x7f, v12
3429; GFX9-G-NEXT:    v_or_b32_e32 v16, v8, v14
3430; GFX9-G-NEXT:    v_and_b32_e32 v8, 1, v18
3431; GFX9-G-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
3432; GFX9-G-NEXT:    v_cndmask_b32_e64 v10, v0, 0, vcc
3433; GFX9-G-NEXT:    v_cndmask_b32_e64 v11, v1, 0, vcc
3434; GFX9-G-NEXT:    v_cndmask_b32_e64 v8, v2, 0, vcc
3435; GFX9-G-NEXT:    v_cndmask_b32_e64 v9, v3, 0, vcc
3436; GFX9-G-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[16:17]
3437; GFX9-G-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
3438; GFX9-G-NEXT:    v_or_b32_e32 v16, v18, v16
3439; GFX9-G-NEXT:    v_and_b32_e32 v16, 1, v16
3440; GFX9-G-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v16
3441; GFX9-G-NEXT:    s_xor_b64 s[4:5], vcc, -1
3442; GFX9-G-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
3443; GFX9-G-NEXT:    s_cbranch_execz .LBB1_6
3444; GFX9-G-NEXT:  ; %bb.1: ; %udiv-bb1
3445; GFX9-G-NEXT:    v_add_co_u32_e32 v18, vcc, 1, v12
3446; GFX9-G-NEXT:    v_addc_co_u32_e32 v19, vcc, 0, v13, vcc
3447; GFX9-G-NEXT:    v_addc_co_u32_e32 v20, vcc, 0, v14, vcc
3448; GFX9-G-NEXT:    v_addc_co_u32_e32 v21, vcc, 0, v15, vcc
3449; GFX9-G-NEXT:    s_xor_b64 s[4:5], vcc, -1
3450; GFX9-G-NEXT:    v_sub_co_u32_e32 v16, vcc, 0x7f, v12
3451; GFX9-G-NEXT:    v_sub_u32_e32 v8, 64, v16
3452; GFX9-G-NEXT:    v_lshrrev_b64 v[8:9], v8, v[0:1]
3453; GFX9-G-NEXT:    v_lshlrev_b64 v[10:11], v16, v[2:3]
3454; GFX9-G-NEXT:    v_add_u32_e32 v14, 0xffffffc0, v16
3455; GFX9-G-NEXT:    v_lshlrev_b64 v[12:13], v16, v[0:1]
3456; GFX9-G-NEXT:    v_or_b32_e32 v10, v8, v10
3457; GFX9-G-NEXT:    v_or_b32_e32 v11, v9, v11
3458; GFX9-G-NEXT:    v_lshlrev_b64 v[8:9], v14, v[0:1]
3459; GFX9-G-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
3460; GFX9-G-NEXT:    s_mov_b64 s[10:11], s[8:9]
3461; GFX9-G-NEXT:    v_cndmask_b32_e32 v14, 0, v12, vcc
3462; GFX9-G-NEXT:    v_cndmask_b32_e32 v15, 0, v13, vcc
3463; GFX9-G-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
3464; GFX9-G-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
3465; GFX9-G-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
3466; GFX9-G-NEXT:    v_mov_b32_e32 v13, s11
3467; GFX9-G-NEXT:    v_cndmask_b32_e32 v8, v8, v2, vcc
3468; GFX9-G-NEXT:    v_cndmask_b32_e32 v9, v9, v3, vcc
3469; GFX9-G-NEXT:    v_mov_b32_e32 v11, s9
3470; GFX9-G-NEXT:    v_mov_b32_e32 v10, s8
3471; GFX9-G-NEXT:    v_mov_b32_e32 v12, s10
3472; GFX9-G-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
3473; GFX9-G-NEXT:    s_xor_b64 s[12:13], exec, s[8:9]
3474; GFX9-G-NEXT:    s_cbranch_execz .LBB1_5
3475; GFX9-G-NEXT:  ; %bb.2: ; %udiv-preheader
3476; GFX9-G-NEXT:    v_sub_u32_e32 v12, 64, v18
3477; GFX9-G-NEXT:    v_add_u32_e32 v22, 0xffffffc0, v18
3478; GFX9-G-NEXT:    v_lshrrev_b64 v[10:11], v18, v[0:1]
3479; GFX9-G-NEXT:    v_lshlrev_b64 v[12:13], v12, v[2:3]
3480; GFX9-G-NEXT:    v_lshrrev_b64 v[16:17], v18, v[2:3]
3481; GFX9-G-NEXT:    v_lshrrev_b64 v[2:3], v22, v[2:3]
3482; GFX9-G-NEXT:    v_or_b32_e32 v10, v10, v12
3483; GFX9-G-NEXT:    v_or_b32_e32 v11, v11, v13
3484; GFX9-G-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v18
3485; GFX9-G-NEXT:    s_mov_b64 s[8:9], 0
3486; GFX9-G-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
3487; GFX9-G-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
3488; GFX9-G-NEXT:    v_cndmask_b32_e32 v16, 0, v16, vcc
3489; GFX9-G-NEXT:    v_cndmask_b32_e32 v17, 0, v17, vcc
3490; GFX9-G-NEXT:    v_add_co_u32_e32 v22, vcc, -1, v4
3491; GFX9-G-NEXT:    v_addc_co_u32_e32 v23, vcc, -1, v5, vcc
3492; GFX9-G-NEXT:    s_mov_b64 s[10:11], s[8:9]
3493; GFX9-G-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v18
3494; GFX9-G-NEXT:    v_addc_co_u32_e32 v24, vcc, -1, v6, vcc
3495; GFX9-G-NEXT:    v_mov_b32_e32 v13, s11
3496; GFX9-G-NEXT:    v_cndmask_b32_e64 v2, v2, v0, s[4:5]
3497; GFX9-G-NEXT:    v_cndmask_b32_e64 v3, v3, v1, s[4:5]
3498; GFX9-G-NEXT:    v_addc_co_u32_e32 v25, vcc, -1, v7, vcc
3499; GFX9-G-NEXT:    v_mov_b32_e32 v1, 0
3500; GFX9-G-NEXT:    v_mov_b32_e32 v11, s9
3501; GFX9-G-NEXT:    v_mov_b32_e32 v10, s8
3502; GFX9-G-NEXT:    v_mov_b32_e32 v12, s10
3503; GFX9-G-NEXT:  .LBB1_3: ; %udiv-do-while
3504; GFX9-G-NEXT:    ; =>This Inner Loop Header: Depth=1
3505; GFX9-G-NEXT:    v_lshlrev_b64 v[12:13], 1, v[14:15]
3506; GFX9-G-NEXT:    v_lshrrev_b32_e32 v0, 31, v15
3507; GFX9-G-NEXT:    v_or_b32_e32 v14, v10, v12
3508; GFX9-G-NEXT:    v_or_b32_e32 v15, v11, v13
3509; GFX9-G-NEXT:    v_lshlrev_b64 v[10:11], 1, v[16:17]
3510; GFX9-G-NEXT:    v_lshrrev_b32_e32 v12, 31, v3
3511; GFX9-G-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
3512; GFX9-G-NEXT:    v_or_b32_e32 v10, v10, v12
3513; GFX9-G-NEXT:    v_lshrrev_b32_e32 v12, 31, v9
3514; GFX9-G-NEXT:    v_or_b32_e32 v2, v2, v12
3515; GFX9-G-NEXT:    v_sub_co_u32_e32 v12, vcc, v22, v2
3516; GFX9-G-NEXT:    v_subb_co_u32_e32 v12, vcc, v23, v3, vcc
3517; GFX9-G-NEXT:    v_subb_co_u32_e32 v12, vcc, v24, v10, vcc
3518; GFX9-G-NEXT:    v_subb_co_u32_e32 v12, vcc, v25, v11, vcc
3519; GFX9-G-NEXT:    v_ashrrev_i32_e32 v12, 31, v12
3520; GFX9-G-NEXT:    v_and_b32_e32 v13, v12, v4
3521; GFX9-G-NEXT:    v_and_b32_e32 v16, v12, v5
3522; GFX9-G-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v13
3523; GFX9-G-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v16, vcc
3524; GFX9-G-NEXT:    v_and_b32_e32 v13, v12, v6
3525; GFX9-G-NEXT:    v_and_b32_e32 v17, v12, v7
3526; GFX9-G-NEXT:    v_subb_co_u32_e32 v16, vcc, v10, v13, vcc
3527; GFX9-G-NEXT:    v_subb_co_u32_e32 v17, vcc, v11, v17, vcc
3528; GFX9-G-NEXT:    v_add_co_u32_e32 v18, vcc, -1, v18
3529; GFX9-G-NEXT:    v_addc_co_u32_e32 v19, vcc, -1, v19, vcc
3530; GFX9-G-NEXT:    v_addc_co_u32_e32 v20, vcc, -1, v20, vcc
3531; GFX9-G-NEXT:    v_addc_co_u32_e32 v21, vcc, -1, v21, vcc
3532; GFX9-G-NEXT:    v_or_b32_e32 v10, v18, v20
3533; GFX9-G-NEXT:    v_or_b32_e32 v11, v19, v21
3534; GFX9-G-NEXT:    v_lshlrev_b64 v[8:9], 1, v[8:9]
3535; GFX9-G-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
3536; GFX9-G-NEXT:    v_or_b32_e32 v8, v8, v0
3537; GFX9-G-NEXT:    v_and_b32_e32 v0, 1, v12
3538; GFX9-G-NEXT:    v_mov_b32_e32 v11, v1
3539; GFX9-G-NEXT:    s_or_b64 s[8:9], vcc, s[8:9]
3540; GFX9-G-NEXT:    v_mov_b32_e32 v10, v0
3541; GFX9-G-NEXT:    s_andn2_b64 exec, exec, s[8:9]
3542; GFX9-G-NEXT:    s_cbranch_execnz .LBB1_3
3543; GFX9-G-NEXT:  ; %bb.4: ; %Flow
3544; GFX9-G-NEXT:    s_or_b64 exec, exec, s[8:9]
3545; GFX9-G-NEXT:  .LBB1_5: ; %Flow2
3546; GFX9-G-NEXT:    s_or_b64 exec, exec, s[12:13]
3547; GFX9-G-NEXT:    v_lshlrev_b64 v[0:1], 1, v[14:15]
3548; GFX9-G-NEXT:    v_lshlrev_b64 v[8:9], 1, v[8:9]
3549; GFX9-G-NEXT:    v_lshrrev_b32_e32 v2, 31, v15
3550; GFX9-G-NEXT:    v_or_b32_e32 v8, v8, v2
3551; GFX9-G-NEXT:    v_or_b32_e32 v10, v10, v0
3552; GFX9-G-NEXT:    v_or_b32_e32 v11, v11, v1
3553; GFX9-G-NEXT:  .LBB1_6: ; %Flow3
3554; GFX9-G-NEXT:    s_or_b64 exec, exec, s[6:7]
3555; GFX9-G-NEXT:    v_mov_b32_e32 v0, v10
3556; GFX9-G-NEXT:    v_mov_b32_e32 v1, v11
3557; GFX9-G-NEXT:    v_mov_b32_e32 v2, v8
3558; GFX9-G-NEXT:    v_mov_b32_e32 v3, v9
3559; GFX9-G-NEXT:    s_setpc_b64 s[30:31]
3560;
3561; GFX9-G-O0-LABEL: v_udiv_i128_vv:
3562; GFX9-G-O0:       ; %bb.0: ; %_udiv-special-cases
3563; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3564; GFX9-G-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
3565; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
3566; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
3567; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v1
3568; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v2
3569; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v3
3570; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
3571; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v10
3572; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v9
3573; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v8
3574; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
3575; GFX9-G-O0-NEXT:    s_nop 0
3576; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
3577; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
3578; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
3579; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v5
3580; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v6
3581; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v7
3582; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
3583; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v10
3584; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v9
3585; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
3586; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
3587; GFX9-G-O0-NEXT:    s_nop 0
3588; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
3589; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
3590; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
3591; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], 0
3592; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v5
3593; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v4
3594; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v7
3595; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v6
3596; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v9
3597; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
3598; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v12
3599; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v13
3600; GFX9-G-O0-NEXT:    v_or_b32_e64 v8, v8, v11
3601; GFX9-G-O0-NEXT:    v_or_b32_e64 v10, v9, v10
3602; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
3603; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
3604; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s5
3605; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s4
3606; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[6:7], v[8:9], v[10:11]
3607; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v1
3608; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v0
3609; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v3
3610; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v2
3611; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v9
3612; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
3613; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v12
3614; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v13
3615; GFX9-G-O0-NEXT:    v_or_b32_e64 v8, v8, v11
3616; GFX9-G-O0-NEXT:    v_or_b32_e64 v10, v9, v10
3617; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
3618; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
3619; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s5
3620; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s4
3621; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], v[10:11]
3622; GFX9-G-O0-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
3623; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v5
3624; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v4
3625; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v7
3626; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v6
3627; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s4
3628; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s5
3629; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[7:8], v[4:5]
3630; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v9
3631; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v10
3632; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v4, v4
3633; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v5, v5
3634; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 32
3635; GFX9-G-O0-NEXT:    v_add_u32_e64 v5, v5, v6
3636; GFX9-G-O0-NEXT:    v_min_u32_e64 v4, v4, v5
3637; GFX9-G-O0-NEXT:    s_mov_b32 s10, 64
3638; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s10
3639; GFX9-G-O0-NEXT:    v_add_u32_e64 v5, v4, v5
3640; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v7
3641; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v8
3642; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v4, v4
3643; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v6, v6
3644; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 32
3645; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v6, v7
3646; GFX9-G-O0-NEXT:    v_min_u32_e64 v4, v4, v6
3647; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[8:9]
3648; GFX9-G-O0-NEXT:    s_mov_b32 s14, 0
3649; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v1
3650; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v0
3651; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v3
3652; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v2
3653; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s5
3654; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s4
3655; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], v[5:6]
3656; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v10
3657; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v11
3658; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v5, v5
3659; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v6, v6
3660; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 32
3661; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v6, v7
3662; GFX9-G-O0-NEXT:    v_min_u32_e64 v5, v5, v6
3663; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s10
3664; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v5, v6
3665; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
3666; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v9
3667; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v5, v5
3668; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v7, v7
3669; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, 32
3670; GFX9-G-O0-NEXT:    v_add_u32_e64 v7, v7, v8
3671; GFX9-G-O0-NEXT:    v_min_u32_e64 v5, v5, v7
3672; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
3673; GFX9-G-O0-NEXT:    s_mov_b32 s13, 0
3674; GFX9-G-O0-NEXT:    s_mov_b32 s11, 0
3675; GFX9-G-O0-NEXT:    s_mov_b32 s12, 0
3676; GFX9-G-O0-NEXT:    s_mov_b32 s10, 0
3677; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v5, s[8:9], v4, v5
3678; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
3679; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s14
3680; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s14
3681; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v6, s[8:9], v4, v6, s[8:9]
3682; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
3683; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s13
3684; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s12
3685; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v8, s[8:9], v4, v7, s[8:9]
3686; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
3687; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s11
3688; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s10
3689; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v7, s[8:9], v4, v7, s[8:9]
3690; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
3691; GFX9-G-O0-NEXT:    s_mov_b64 s[8:9], 0x7f
3692; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v5
3693; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v6
3694; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v8
3695; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v7
3696; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s9
3697; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s8
3698; GFX9-G-O0-NEXT:    v_cmp_gt_u64_e64 s[12:13], v[11:12], v[13:14]
3699; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s5
3700; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s4
3701; GFX9-G-O0-NEXT:    v_cmp_gt_u64_e64 s[10:11], v[9:10], v[11:12]
3702; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s5
3703; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s4
3704; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12]
3705; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, 1
3706; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
3707; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v4, v9, s[12:13]
3708; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, 1
3709; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
3710; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[10:11]
3711; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v4, v9, s[8:9]
3712; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, 1
3713; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
3714; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[6:7]
3715; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v9
3716; GFX9-G-O0-NEXT:    s_mov_b32 s7, 0x7f
3717; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
3718; GFX9-G-O0-NEXT:    v_xor_b32_e64 v5, v5, s7
3719; GFX9-G-O0-NEXT:    v_xor_b32_e64 v6, v6, s6
3720; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v5, v8
3721; GFX9-G-O0-NEXT:    v_or_b32_e64 v7, v6, v7
3722; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
3723; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v7
3724; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s5
3725; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s4
3726; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[5:6], v[7:8]
3727; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v1
3728; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v0
3729; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v3
3730; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v2
3731; GFX9-G-O0-NEXT:    v_and_b32_e32 v0, 1, v4
3732; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v0
3733; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, 0
3734; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, 0
3735; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v5
3736; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v6
3737; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[6:7]
3738; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[6:7]
3739; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3740; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v2
3741; GFX9-G-O0-NEXT:    v_and_b32_e32 v2, 1, v4
3742; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v2
3743; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 0
3744; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, 0
3745; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v7
3746; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v8
3747; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[6:7]
3748; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[6:7]
3749; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
3750; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v2
3751; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
3752; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v5
3753; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v6
3754; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 1
3755; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, 0
3756; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
3757; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v5
3758; GFX9-G-O0-NEXT:    v_and_b32_e32 v4, 1, v4
3759; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v4
3760; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], -1
3761; GFX9-G-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
3762; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
3763; GFX9-G-O0-NEXT:    s_nop 0
3764; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
3765; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
3766; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
3767; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], exec
3768; GFX9-G-O0-NEXT:    ; implicit-def: $vgpr34 : SGPR spill to VGPR lane
3769; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s4, 0
3770; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s5, 1
3771; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
3772; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
3773; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
3774; GFX9-G-O0-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
3775; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
3776; GFX9-G-O0-NEXT:    s_cbranch_execz .LBB1_3
3777; GFX9-G-O0-NEXT:    s_branch .LBB1_8
3778; GFX9-G-O0-NEXT:  .LBB1_1: ; %Flow
3779; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
3780; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
3781; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
3782; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
3783; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v34, 2
3784; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v34, 3
3785; GFX9-G-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
3786; GFX9-G-O0-NEXT:  ; %bb.2: ; %Flow
3787; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
3788; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
3789; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
3790; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
3791; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
3792; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
3793; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
3794; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
3795; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
3796; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
3797; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
3798; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
3799; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
3800; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
3801; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
3802; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
3803; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
3804; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
3805; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
3806; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
3807; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
3808; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
3809; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(7)
3810; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
3811; GFX9-G-O0-NEXT:    s_branch .LBB1_5
3812; GFX9-G-O0-NEXT:  .LBB1_3: ; %Flow2
3813; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
3814; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
3815; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
3816; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
3817; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v34, 0
3818; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v34, 1
3819; GFX9-G-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
3820; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
3821; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
3822; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
3823; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
3824; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
3825; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
3826; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
3827; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
3828; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
3829; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
3830; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
3831; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
3832; GFX9-G-O0-NEXT:    s_branch .LBB1_9
3833; GFX9-G-O0-NEXT:  .LBB1_4: ; %udiv-loop-exit
3834; GFX9-G-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
3835; GFX9-G-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
3836; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
3837; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
3838; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
3839; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
3840; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
3841; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
3842; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(2)
3843; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v4
3844; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v5
3845; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
3846; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v6
3847; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v7
3848; GFX9-G-O0-NEXT:    s_mov_b32 s4, 1
3849; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s4
3850; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[10:11], v0, v[2:3]
3851; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s4
3852; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[0:1], v0, v[4:5]
3853; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr2 killed $exec
3854; GFX9-G-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec
3855; GFX9-G-O0-NEXT:    s_mov_b32 s4, 31
3856; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s4
3857; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v6, v2, v3
3858; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, 0
3859; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v0
3860; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v1
3861; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v14
3862; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v15
3863; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v16
3864; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v17
3865; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v12
3866; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v13
3867; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v10
3868; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v11
3869; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v7
3870; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v1, v5
3871; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3872; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v5
3873; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
3874; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v9
3875; GFX9-G-O0-NEXT:    v_or3_b32 v4, v4, v6, v7
3876; GFX9-G-O0-NEXT:    v_or3_b32 v2, v2, v3, v5
3877; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
3878; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v2
3879; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
3880; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v4
3881; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v5
3882; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
3883; GFX9-G-O0-NEXT:    s_nop 0
3884; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
3885; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
3886; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
3887; GFX9-G-O0-NEXT:    s_branch .LBB1_3
3888; GFX9-G-O0-NEXT:  .LBB1_5: ; %Flow1
3889; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
3890; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
3891; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
3892; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
3893; GFX9-G-O0-NEXT:    v_readlane_b32 s4, v34, 4
3894; GFX9-G-O0-NEXT:    v_readlane_b32 s5, v34, 5
3895; GFX9-G-O0-NEXT:    s_or_b64 exec, exec, s[4:5]
3896; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
3897; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
3898; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
3899; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
3900; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
3901; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
3902; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
3903; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
3904; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
3905; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
3906; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
3907; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
3908; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
3909; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
3910; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
3911; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
3912; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
3913; GFX9-G-O0-NEXT:    s_nop 0
3914; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
3915; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
3916; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
3917; GFX9-G-O0-NEXT:    s_branch .LBB1_4
3918; GFX9-G-O0-NEXT:  .LBB1_6: ; %udiv-do-while
3919; GFX9-G-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
3920; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
3921; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
3922; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
3923; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
3924; GFX9-G-O0-NEXT:    v_readlane_b32 s6, v34, 6
3925; GFX9-G-O0-NEXT:    v_readlane_b32 s7, v34, 7
3926; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
3927; GFX9-G-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
3928; GFX9-G-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
3929; GFX9-G-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
3930; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
3931; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
3932; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
3933; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
3934; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
3935; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
3936; GFX9-G-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
3937; GFX9-G-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
3938; GFX9-G-O0-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
3939; GFX9-G-O0-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
3940; GFX9-G-O0-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
3941; GFX9-G-O0-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
3942; GFX9-G-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
3943; GFX9-G-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3944; GFX9-G-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3945; GFX9-G-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3946; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
3947; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
3948; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
3949; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
3950; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], 0
3951; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(18)
3952; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v2
3953; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v3
3954; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(16)
3955; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v4
3956; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v5
3957; GFX9-G-O0-NEXT:    s_mov_b32 s8, 1
3958; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
3959; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[20:21], v2, v[0:1]
3960; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
3961; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[4:5], v2, v[3:4]
3962; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
3963; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
3964; GFX9-G-O0-NEXT:    s_mov_b32 s9, 31
3965; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s9
3966; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v3, v0, v1
3967; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, 0
3968; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v4
3969; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v5
3970; GFX9-G-O0-NEXT:    v_or_b32_e64 v7, v2, v3
3971; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v0, v1
3972; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr12_vgpr13 killed $exec
3973; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v14
3974; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v15
3975; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
3976; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
3977; GFX9-G-O0-NEXT:    s_mov_b32 s9, 31
3978; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s9
3979; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v3, v0, v1
3980; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, 0
3981; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v20
3982; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v21
3983; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v2, v3
3984; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v0, v1
3985; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v12
3986; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v13
3987; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v14
3988; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v15
3989; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
3990; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[22:23], v0, v[2:3]
3991; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
3992; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[0:1], v0, v[12:13]
3993; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr2 killed $exec
3994; GFX9-G-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec
3995; GFX9-G-O0-NEXT:    s_mov_b32 s8, 31
3996; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
3997; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v14, v2, v3
3998; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, 0
3999; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v0
4000; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v1
4001; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(10)
4002; GFX9-G-O0-NEXT:    v_mov_b32_e32 v28, v30
4003; GFX9-G-O0-NEXT:    v_mov_b32_e32 v29, v31
4004; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(8)
4005; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v32
4006; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v33
4007; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v28
4008; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v29
4009; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v22
4010; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v23
4011; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v15
4012; GFX9-G-O0-NEXT:    v_or_b32_e64 v13, v1, v13
4013; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4014; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v13
4015; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v20
4016; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v21
4017; GFX9-G-O0-NEXT:    v_or3_b32 v12, v12, v14, v15
4018; GFX9-G-O0-NEXT:    v_or3_b32 v2, v2, v3, v13
4019; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
4020; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v2
4021; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
4022; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v12
4023; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v13
4024; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
4025; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v11, s[8:9], v11, v4
4026; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v10, s[8:9], v10, v9, s[8:9]
4027; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v8, s[8:9], v8, v7, s[8:9]
4028; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v10, s[8:9], v6, v5, s[8:9]
4029; GFX9-G-O0-NEXT:    s_mov_b32 s8, 31
4030; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s8
4031; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v8, v6, v10
4032; GFX9-G-O0-NEXT:    s_mov_b32 s8, 31
4033; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s8
4034; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v6, v6, v10
4035; GFX9-G-O0-NEXT:    s_mov_b32 s9, 1
4036; GFX9-G-O0-NEXT:    s_mov_b32 s8, 0
4037; GFX9-G-O0-NEXT:    v_and_b32_e64 v12, v8, s9
4038; GFX9-G-O0-NEXT:    v_and_b32_e64 v10, v8, s8
4039; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
4040; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v10
4041; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s5
4042; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s4
4043; GFX9-G-O0-NEXT:    ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
4044; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v11
4045; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v10
4046; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v24
4047; GFX9-G-O0-NEXT:    v_mov_b32_e32 v23, v25
4048; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v26
4049; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v27
4050; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v22
4051; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v23
4052; GFX9-G-O0-NEXT:    v_and_b32_e64 v11, v8, v11
4053; GFX9-G-O0-NEXT:    v_and_b32_e64 v10, v8, v10
4054; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v20
4055; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v21
4056; GFX9-G-O0-NEXT:    v_and_b32_e64 v8, v6, v8
4057; GFX9-G-O0-NEXT:    v_and_b32_e64 v6, v6, v20
4058; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v4, s[8:9], v4, v11
4059; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v10, s[8:9], v9, v10, s[8:9]
4060; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v9, s[8:9], v7, v8, s[8:9]
4061; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v8, s[8:9], v5, v6, s[8:9]
4062; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
4063; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v10
4064; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v9
4065; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
4066; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v16
4067; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v17
4068; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v18
4069; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v19
4070; GFX9-G-O0-NEXT:    s_mov_b32 s8, -1
4071; GFX9-G-O0-NEXT:    s_mov_b32 s12, -1
4072; GFX9-G-O0-NEXT:    s_mov_b32 s11, -1
4073; GFX9-G-O0-NEXT:    s_mov_b32 s10, -1
4074; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, s8
4075; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v16, s[8:9], v11, v16
4076; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s12
4077; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v17, s[8:9], v10, v11, s[8:9]
4078; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s11
4079; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v19, s[8:9], v9, v10, s[8:9]
4080; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, s10
4081; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v18, s[8:9], v8, v9, s[8:9]
4082; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v16
4083; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v17
4084; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v19
4085; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v18
4086; GFX9-G-O0-NEXT:    v_or_b32_e64 v16, v16, v19
4087; GFX9-G-O0-NEXT:    v_or_b32_e64 v18, v17, v18
4088; GFX9-G-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
4089; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v18
4090; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, s5
4091; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, s4
4092; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[16:17], v[18:19]
4093; GFX9-G-O0-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
4094; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v3
4095; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v2
4096; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v1
4097; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v0
4098; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
4099; GFX9-G-O0-NEXT:    s_nop 0
4100; GFX9-G-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
4101; GFX9-G-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
4102; GFX9-G-O0-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
4103; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v15
4104; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v14
4105; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v13
4106; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v12
4107; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
4108; GFX9-G-O0-NEXT:    s_nop 0
4109; GFX9-G-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
4110; GFX9-G-O0-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
4111; GFX9-G-O0-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
4112; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
4113; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s6, 2
4114; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s7, 3
4115; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], s[4:5]
4116; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s6, 6
4117; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s7, 7
4118; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
4119; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
4120; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
4121; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
4122; GFX9-G-O0-NEXT:    s_nop 0
4123; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
4124; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
4125; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
4126; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
4127; GFX9-G-O0-NEXT:    s_nop 0
4128; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
4129; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
4130; GFX9-G-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
4131; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
4132; GFX9-G-O0-NEXT:    s_nop 0
4133; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
4134; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
4135; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
4136; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
4137; GFX9-G-O0-NEXT:    s_nop 0
4138; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
4139; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
4140; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
4141; GFX9-G-O0-NEXT:    s_andn2_b64 exec, exec, s[4:5]
4142; GFX9-G-O0-NEXT:    s_cbranch_execnz .LBB1_6
4143; GFX9-G-O0-NEXT:    s_branch .LBB1_1
4144; GFX9-G-O0-NEXT:  .LBB1_7: ; %udiv-preheader
4145; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
4146; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
4147; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
4148; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
4149; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
4150; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
4151; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
4152; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
4153; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
4154; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
4155; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
4156; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
4157; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
4158; GFX9-G-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
4159; GFX9-G-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
4160; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
4161; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
4162; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
4163; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
4164; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
4165; GFX9-G-O0-NEXT:    s_mov_b32 s4, 64
4166; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(2)
4167; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v5
4168; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v4
4169; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
4170; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v7
4171; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v6
4172; GFX9-G-O0-NEXT:    s_mov_b32 s5, 0xffffffc0
4173; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s5
4174; GFX9-G-O0-NEXT:    v_add_u32_e64 v4, v12, v4
4175; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s4
4176; GFX9-G-O0-NEXT:    v_sub_u32_e64 v5, v5, v12
4177; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
4178; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s4
4179; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v12, v6
4180; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s6
4181; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v12, v6
4182; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[6:7], v12, v[20:21]
4183; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[25:26], v12, v[14:15]
4184; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[23:24], v5, v[20:21]
4185; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v25
4186; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v26
4187; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v23
4188; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v24
4189; GFX9-G-O0-NEXT:    v_or_b32_e64 v13, v13, v22
4190; GFX9-G-O0-NEXT:    v_or_b32_e64 v12, v5, v12
4191; GFX9-G-O0-NEXT:    s_mov_b64 s[8:9], 0
4192; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[20:21], v4, v[20:21]
4193; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v20
4194; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v21
4195; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v13, s[4:5]
4196; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s[4:5]
4197; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v14
4198; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v15
4199; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v13, s[6:7]
4200; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v12, v5, v12, s[6:7]
4201; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
4202; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v12
4203; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v6
4204; GFX9-G-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
4205; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, 0
4206; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 0
4207; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v12, v12, v13, s[4:5]
4208; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
4209; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
4210; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v6
4211; GFX9-G-O0-NEXT:    ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
4212; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v12
4213; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v13
4214; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v16
4215; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v17
4216; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v18
4217; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v19
4218; GFX9-G-O0-NEXT:    s_mov_b32 s4, -1
4219; GFX9-G-O0-NEXT:    s_mov_b32 s10, -1
4220; GFX9-G-O0-NEXT:    s_mov_b32 s7, -1
4221; GFX9-G-O0-NEXT:    s_mov_b32 s6, -1
4222; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, s4
4223; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v15, s[4:5], v15, v16
4224; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
4225; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s10
4226; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v14, s[4:5], v14, v15, s[4:5]
4227; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
4228; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s7
4229; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v13, s[4:5], v13, v14, s[4:5]
4230; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
4231; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s6
4232; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v12, s[4:5], v12, v13, s[4:5]
4233; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
4234; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], s[8:9]
4235; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], s[8:9]
4236; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s8, 6
4237; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s9, 7
4238; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
4239; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
4240; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
4241; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s7
4242; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, s6
4243; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, s5
4244; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s4
4245; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
4246; GFX9-G-O0-NEXT:    s_nop 0
4247; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
4248; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
4249; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
4250; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
4251; GFX9-G-O0-NEXT:    s_nop 0
4252; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
4253; GFX9-G-O0-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
4254; GFX9-G-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
4255; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
4256; GFX9-G-O0-NEXT:    s_nop 0
4257; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
4258; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
4259; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
4260; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
4261; GFX9-G-O0-NEXT:    s_nop 0
4262; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
4263; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
4264; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
4265; GFX9-G-O0-NEXT:    s_branch .LBB1_6
4266; GFX9-G-O0-NEXT:  .LBB1_8: ; %udiv-bb1
4267; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
4268; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
4269; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
4270; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
4271; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
4272; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
4273; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
4274; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
4275; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
4276; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
4277; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
4278; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], 0
4279; GFX9-G-O0-NEXT:    s_mov_b32 s6, 1
4280; GFX9-G-O0-NEXT:    s_mov_b32 s10, 0
4281; GFX9-G-O0-NEXT:    s_mov_b32 s9, 0
4282; GFX9-G-O0-NEXT:    s_mov_b32 s8, 0
4283; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s6
4284; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
4285; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v4, s[6:7], v1, v4
4286; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
4287; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s10
4288; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(1)
4289; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v5, s[6:7], v3, v5, s[6:7]
4290; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s9
4291; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v7, s[6:7], v2, v3, s[6:7]
4292; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
4293; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v0, v2, s[6:7]
4294; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v4
4295; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v5
4296; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v7
4297; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v6
4298; GFX9-G-O0-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
4299; GFX9-G-O0-NEXT:    s_nop 0
4300; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
4301; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
4302; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
4303; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0x7f
4304; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
4305; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v3, s[6:7], v0, v1
4306; GFX9-G-O0-NEXT:    s_mov_b32 s7, 64
4307; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v9
4308; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v8
4309; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0xffffffc0
4310; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
4311; GFX9-G-O0-NEXT:    v_add_u32_e64 v2, v3, v0
4312; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
4313; GFX9-G-O0-NEXT:    v_sub_u32_e64 v8, v0, v3
4314; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
4315; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s7
4316; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[8:9], v3, v0
4317; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s6
4318; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v3, v0
4319; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[0:1], v3, v[12:13]
4320; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[17:18], v8, v[12:13]
4321; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[15:16], v3, v[10:11]
4322; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v17
4323; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v18
4324; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v15
4325; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v16
4326; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v9, v14
4327; GFX9-G-O0-NEXT:    v_or_b32_e64 v3, v3, v8
4328; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[12:13], v2, v[12:13]
4329; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v0
4330; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v1
4331; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, 0
4332; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, 0
4333; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v8, s[8:9]
4334; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[8:9]
4335; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4336; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v2
4337; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v12
4338; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v13
4339; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[8:9]
4340; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[8:9]
4341; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
4342; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v11
4343; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[6:7]
4344; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[6:7]
4345; GFX9-G-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
4346; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v2
4347; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
4348; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v8
4349; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v9
4350; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
4351; GFX9-G-O0-NEXT:    s_nop 0
4352; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
4353; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
4354; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
4355; GFX9-G-O0-NEXT:    s_mov_b64 s[8:9], s[4:5]
4356; GFX9-G-O0-NEXT:    s_mov_b64 s[10:11], s[4:5]
4357; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v7
4358; GFX9-G-O0-NEXT:    v_or_b32_e64 v6, v5, v6
4359; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
4360; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v6
4361; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s5
4362; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s4
4363; GFX9-G-O0-NEXT:    v_cmp_ne_u64_e64 s[4:5], v[4:5], v[6:7]
4364; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
4365; GFX9-G-O0-NEXT:    s_nop 0
4366; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
4367; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
4368; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
4369; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
4370; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s9
4371; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s10
4372; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s11
4373; GFX9-G-O0-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
4374; GFX9-G-O0-NEXT:    s_nop 0
4375; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
4376; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
4377; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
4378; GFX9-G-O0-NEXT:    s_mov_b64 s[6:7], exec
4379; GFX9-G-O0-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
4380; GFX9-G-O0-NEXT:    s_xor_b64 s[6:7], s[4:5], s[6:7]
4381; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s6, 4
4382; GFX9-G-O0-NEXT:    v_writelane_b32 v34, s7, 5
4383; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[18:19], -1
4384; GFX9-G-O0-NEXT:    buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
4385; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[18:19]
4386; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
4387; GFX9-G-O0-NEXT:    s_cbranch_execz .LBB1_5
4388; GFX9-G-O0-NEXT:    s_branch .LBB1_7
4389; GFX9-G-O0-NEXT:  .LBB1_9: ; %udiv-end
4390; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
4391; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
4392; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
4393; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
4394; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
4395; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v3
4396; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(2)
4397; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v4
4398; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(1)
4399; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v5
4400; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
4401; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v6
4402; GFX9-G-O0-NEXT:    s_xor_saveexec_b64 s[4:5], -1
4403; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
4404; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[4:5]
4405; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
4406; GFX9-G-O0-NEXT:    s_setpc_b64 s[30:31]
4407  %div = udiv i128 %lhs, %rhs
4408  ret i128 %div
4409}
4410
4411define i128 @v_sdiv_i128_v_pow2k(i128 %lhs) {
4412; GFX9-LABEL: v_sdiv_i128_v_pow2k:
4413; GFX9:       ; %bb.0:
4414; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4415; GFX9-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
4416; GFX9-NEXT:    v_mov_b32_e32 v5, v4
4417; GFX9-NEXT:    v_lshrrev_b64 v[4:5], 31, v[4:5]
4418; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v4
4419; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v1, v5, vcc
4420; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
4421; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
4422; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 31, v[2:3]
4423; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 1, v4
4424; GFX9-NEXT:    v_ashrrev_i64 v[2:3], 33, v[2:3]
4425; GFX9-NEXT:    v_or_b32_e32 v0, v4, v0
4426; GFX9-NEXT:    s_setpc_b64 s[30:31]
4427;
4428; GFX9-O0-LABEL: v_sdiv_i128_v_pow2k:
4429; GFX9-O0:       ; %bb.0:
4430; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4431; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v2
4432; GFX9-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
4433; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v0
4434; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
4435; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
4436; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
4437; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
4438; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v3
4439; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
4440; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
4441; GFX9-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4442; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
4443; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
4444; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
4445; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
4446; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v1
4447; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
4448; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v4
4449; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
4450; GFX9-O0-NEXT:    s_mov_b32 s4, 63
4451; GFX9-O0-NEXT:    v_ashrrev_i64 v[4:5], s4, v[4:5]
4452; GFX9-O0-NEXT:    s_mov_b32 s5, 31
4453; GFX9-O0-NEXT:    v_lshrrev_b64 v[6:7], s5, v[4:5]
4454; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v6
4455; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v7
4456; GFX9-O0-NEXT:    s_mov_b64 s[8:9], 0
4457; GFX9-O0-NEXT:    s_mov_b32 s6, s8
4458; GFX9-O0-NEXT:    s_mov_b32 s4, s9
4459; GFX9-O0-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v5
4460; GFX9-O0-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v4, vcc
4461; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s6
4462; GFX9-O0-NEXT:    v_addc_co_u32_e32 v5, vcc, v2, v4, vcc
4463; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s4
4464; GFX9-O0-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
4465; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
4466; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
4467; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
4468; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v1
4469; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
4470; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
4471; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
4472; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4473; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v3
4474; GFX9-O0-NEXT:    s_mov_b32 s4, 33
4475; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
4476; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
4477; GFX9-O0-NEXT:    v_lshl_or_b32 v0, v2, s5, v0
4478; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v5
4479; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v6
4480; GFX9-O0-NEXT:    v_ashrrev_i64 v[3:4], s4, v[3:4]
4481; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
4482; GFX9-O0-NEXT:    s_mov_b32 s4, 1
4483; GFX9-O0-NEXT:    v_alignbit_b32 v1, v1, v2, s4
4484; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
4485; GFX9-O0-NEXT:    s_mov_b32 s4, 32
4486; GFX9-O0-NEXT:    v_lshrrev_b64 v[3:4], s4, v[3:4]
4487; GFX9-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
4488; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
4489;
4490; GFX9-G-LABEL: v_sdiv_i128_v_pow2k:
4491; GFX9-G:       ; %bb.0:
4492; GFX9-G-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4493; GFX9-G-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
4494; GFX9-G-NEXT:    v_mov_b32_e32 v5, v4
4495; GFX9-G-NEXT:    v_lshrrev_b64 v[4:5], 31, v[4:5]
4496; GFX9-G-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v4
4497; GFX9-G-NEXT:    v_addc_co_u32_e32 v4, vcc, v1, v5, vcc
4498; GFX9-G-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
4499; GFX9-G-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v3, vcc
4500; GFX9-G-NEXT:    v_lshlrev_b64 v[0:1], 31, v[1:2]
4501; GFX9-G-NEXT:    v_lshrrev_b32_e32 v3, 1, v4
4502; GFX9-G-NEXT:    v_or_b32_e32 v0, v3, v0
4503; GFX9-G-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
4504; GFX9-G-NEXT:    v_ashrrev_i32_e32 v2, 1, v2
4505; GFX9-G-NEXT:    s_setpc_b64 s[30:31]
4506;
4507; GFX9-G-O0-LABEL: v_sdiv_i128_v_pow2k:
4508; GFX9-G-O0:       ; %bb.0:
4509; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4510; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v0
4511; GFX9-G-O0-NEXT:    s_mov_b32 s4, 31
4512; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s4
4513; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v0, v0, v3
4514; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v0
4515; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v0
4516; GFX9-G-O0-NEXT:    s_mov_b32 s4, 31
4517; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s4
4518; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[6:7], v0, v[5:6]
4519; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v6
4520; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v7
4521; GFX9-G-O0-NEXT:    s_mov_b32 s8, 0
4522; GFX9-G-O0-NEXT:    s_mov_b32 s5, 0
4523; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v4, s[6:7], v4, v5
4524; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v1, s[6:7], v1, v0, s[6:7]
4525; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
4526; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v5, s[6:7], v2, v0, s[6:7]
4527; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s5
4528; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v4, s[6:7], v3, v0, s[6:7]
4529; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
4530; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v4
4531; GFX9-G-O0-NEXT:    s_mov_b32 s5, 1
4532; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s5
4533; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v0, v0, v1
4534; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, 0
4535; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s4
4536; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[5:6], v2, v[5:6]
4537; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v5
4538; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v6
4539; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v3
4540; GFX9-G-O0-NEXT:    v_or_b32_e64 v1, v1, v2
4541; GFX9-G-O0-NEXT:    s_mov_b32 s4, 31
4542; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s4
4543; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v3, v2, v4
4544; GFX9-G-O0-NEXT:    s_mov_b32 s4, 1
4545; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s4
4546; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v2, v2, v4
4547; GFX9-G-O0-NEXT:    s_setpc_b64 s[30:31]
4548  %div = sdiv i128 %lhs, 8589934592
4549  ret i128 %div
4550}
4551
4552define i128 @v_udiv_i128_v_pow2k(i128 %lhs) {
4553; GFX9-LABEL: v_udiv_i128_v_pow2k:
4554; GFX9:       ; %bb.0:
4555; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4556; GFX9-NEXT:    v_mov_b32_e32 v4, v1
4557; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 31, v[2:3]
4558; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 1, v4
4559; GFX9-NEXT:    v_or_b32_e32 v0, v2, v0
4560; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
4561; GFX9-NEXT:    v_mov_b32_e32 v3, 0
4562; GFX9-NEXT:    s_setpc_b64 s[30:31]
4563;
4564; GFX9-O0-LABEL: v_udiv_i128_v_pow2k:
4565; GFX9-O0:       ; %bb.0:
4566; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4567; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v2
4568; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
4569; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
4570; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
4571; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4572; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
4573; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
4574; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
4575; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
4576; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v3
4577; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
4578; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
4579; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v5
4580; GFX9-O0-NEXT:    s_mov_b32 s4, 33
4581; GFX9-O0-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
4582; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
4583; GFX9-O0-NEXT:    s_mov_b32 s5, 31
4584; GFX9-O0-NEXT:    v_lshl_or_b32 v0, v4, s5, v0
4585; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v5
4586; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v6
4587; GFX9-O0-NEXT:    v_lshrrev_b64 v[2:3], s4, v[1:2]
4588; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
4589; GFX9-O0-NEXT:    s_mov_b32 s4, 1
4590; GFX9-O0-NEXT:    v_alignbit_b32 v1, v1, v4, s4
4591; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec
4592; GFX9-O0-NEXT:    v_mov_b32_e32 v3, 0
4593; GFX9-O0-NEXT:    s_setpc_b64 s[30:31]
4594;
4595; GFX9-G-LABEL: v_udiv_i128_v_pow2k:
4596; GFX9-G:       ; %bb.0:
4597; GFX9-G-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4598; GFX9-G-NEXT:    v_mov_b32_e32 v4, v1
4599; GFX9-G-NEXT:    v_lshlrev_b64 v[0:1], 31, v[2:3]
4600; GFX9-G-NEXT:    v_lshrrev_b32_e32 v2, 1, v4
4601; GFX9-G-NEXT:    v_or_b32_e32 v0, v2, v0
4602; GFX9-G-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
4603; GFX9-G-NEXT:    v_mov_b32_e32 v3, 0
4604; GFX9-G-NEXT:    s_setpc_b64 s[30:31]
4605;
4606; GFX9-G-O0-LABEL: v_udiv_i128_v_pow2k:
4607; GFX9-G-O0:       ; %bb.0:
4608; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4609; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v2
4610; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
4611; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v3
4612; GFX9-G-O0-NEXT:    s_mov_b32 s4, 1
4613; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s4
4614; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v0, v0, v1
4615; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, 0
4616; GFX9-G-O0-NEXT:    s_mov_b32 s4, 31
4617; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s4
4618; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[5:6], v2, v[4:5]
4619; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v5
4620; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v6
4621; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v4
4622; GFX9-G-O0-NEXT:    v_or_b32_e64 v1, v1, v2
4623; GFX9-G-O0-NEXT:    s_mov_b32 s4, 1
4624; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s4
4625; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v2, v2, v3
4626; GFX9-G-O0-NEXT:    s_mov_b32 s4, 0
4627; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s4
4628; GFX9-G-O0-NEXT:    s_setpc_b64 s[30:31]
4629  %div = udiv i128 %lhs, 8589934592
4630  ret i128 %div
4631}
4632