Lines Matching full:g
5 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9-G %s
6 ; RUN: llc -O0 -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9-G-O0 %s
1224 ; GFX9-G-LABEL: v_sdiv_i128_vv:
1225 ; GFX9-G: ; %bb.0: ; %_udiv-special-cases
1226 ; GFX9-G-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1227 ; GFX9-G-NEXT: v_ashrrev_i32_e32 v16, 31, v3
1228 ; GFX9-G-NEXT: v_xor_b32_e32 v0, v16, v0
1229 ; GFX9-G-NEXT: v_xor_b32_e32 v1, v16, v1
1230 ; GFX9-G-NEXT: v_sub_co_u32_e32 v8, vcc, v0, v16
1231 ; GFX9-G-NEXT: v_xor_b32_e32 v2, v16, v2
1232 ; GFX9-G-NEXT: v_subb_co_u32_e32 v9, vcc, v1, v16, vcc
1233 ; GFX9-G-NEXT: v_ashrrev_i32_e32 v17, 31, v7
1234 ; GFX9-G-NEXT: v_xor_b32_e32 v3, v16, v3
1235 ; GFX9-G-NEXT: v_subb_co_u32_e32 v10, vcc, v2, v16, vcc
1236 ; GFX9-G-NEXT: v_subb_co_u32_e32 v11, vcc, v3, v16, vcc
1237 ; GFX9-G-NEXT: v_xor_b32_e32 v0, v17, v4
1238 ; GFX9-G-NEXT: v_xor_b32_e32 v1, v17, v5
1239 ; GFX9-G-NEXT: v_sub_co_u32_e32 v18, vcc, v0, v17
1240 ; GFX9-G-NEXT: v_xor_b32_e32 v2, v17, v6
1241 ; GFX9-G-NEXT: v_subb_co_u32_e32 v19, vcc, v1, v17, vcc
1242 ; GFX9-G-NEXT: v_xor_b32_e32 v3, v17, v7
1243 ; GFX9-G-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v17, vcc
1244 ; GFX9-G-NEXT: v_subb_co_u32_e32 v5, vcc, v3, v17, vcc
1245 ; GFX9-G-NEXT: v_or_b32_e32 v0, v18, v4
1246 ; GFX9-G-NEXT: v_or_b32_e32 v1, v19, v5
1247 ; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1248 ; GFX9-G-NEXT: v_or_b32_e32 v0, v8, v10
1249 ; GFX9-G-NEXT: v_or_b32_e32 v1, v9, v11
1250 ; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
1251 ; GFX9-G-NEXT: v_ffbh_u32_e32 v1, v18
1252 ; GFX9-G-NEXT: v_ffbh_u32_e32 v0, v19
1253 ; GFX9-G-NEXT: v_add_u32_e32 v1, 32, v1
1254 ; GFX9-G-NEXT: v_ffbh_u32_e32 v2, v4
1255 ; GFX9-G-NEXT: v_min_u32_e32 v0, v0, v1
1256 ; GFX9-G-NEXT: v_ffbh_u32_e32 v1, v5
1257 ; GFX9-G-NEXT: v_add_u32_e32 v2, 32, v2
1258 ; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[4:5]
1259 ; GFX9-G-NEXT: v_add_u32_e32 v0, 64, v0
1260 ; GFX9-G-NEXT: v_min_u32_e32 v1, v1, v2
1261 ; GFX9-G-NEXT: v_ffbh_u32_e32 v2, v8
1262 ; GFX9-G-NEXT: v_cndmask_b32_e64 v0, v1, v0, s[6:7]
1263 ; GFX9-G-NEXT: v_ffbh_u32_e32 v1, v9
1264 ; GFX9-G-NEXT: v_add_u32_e32 v2, 32, v2
1265 ; GFX9-G-NEXT: v_ffbh_u32_e32 v3, v10
1266 ; GFX9-G-NEXT: v_min_u32_e32 v1, v1, v2
1267 ; GFX9-G-NEXT: v_ffbh_u32_e32 v2, v11
1268 ; GFX9-G-NEXT: v_add_u32_e32 v3, 32, v3
1269 ; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[10:11]
1270 ; GFX9-G-NEXT: v_add_u32_e32 v1, 64, v1
1271 ; GFX9-G-NEXT: v_min_u32_e32 v2, v2, v3
1272 ; GFX9-G-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[6:7]
1273 ; GFX9-G-NEXT: v_sub_co_u32_e64 v0, s[6:7], v0, v1
1274 ; GFX9-G-NEXT: v_subb_co_u32_e64 v1, s[6:7], 0, 0, s[6:7]
1275 ; GFX9-G-NEXT: v_mov_b32_e32 v6, 0x7f
1276 ; GFX9-G-NEXT: v_subb_co_u32_e64 v2, s[6:7], 0, 0, s[6:7]
1277 ; GFX9-G-NEXT: v_mov_b32_e32 v7, 0
1278 ; GFX9-G-NEXT: v_subb_co_u32_e64 v3, s[6:7], 0, 0, s[6:7]
1279 ; GFX9-G-NEXT: v_cmp_gt_u64_e64 s[6:7], v[0:1], v[6:7]
1280 ; GFX9-G-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1281 ; GFX9-G-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[6:7]
1282 ; GFX9-G-NEXT: v_cmp_lt_u64_e64 s[6:7], 0, v[2:3]
1283 ; GFX9-G-NEXT: v_or_b32_e32 v15, v1, v3
1284 ; GFX9-G-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[6:7]
1285 ; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[2:3]
1286 ; GFX9-G-NEXT: s_mov_b64 s[8:9], 0
1287 ; GFX9-G-NEXT: v_cndmask_b32_e64 v6, v7, v6, s[6:7]
1288 ; GFX9-G-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5]
1289 ; GFX9-G-NEXT: v_or_b32_e32 v20, v7, v6
1290 ; GFX9-G-NEXT: v_xor_b32_e32 v6, 0x7f, v0
1291 ; GFX9-G-NEXT: v_or_b32_e32 v14, v6, v2
1292 ; GFX9-G-NEXT: v_and_b32_e32 v6, 1, v20
1293 ; GFX9-G-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
1294 ; GFX9-G-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
1295 ; GFX9-G-NEXT: v_cndmask_b32_e64 v7, v9, 0, vcc
1296 ; GFX9-G-NEXT: v_cndmask_b32_e64 v12, v10, 0, vcc
1297 ; GFX9-G-NEXT: v_cndmask_b32_e64 v13, v11, 0, vcc
1298 ; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15]
1299 ; GFX9-G-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1300 ; GFX9-G-NEXT: v_or_b32_e32 v14, v20, v14
1301 ; GFX9-G-NEXT: v_and_b32_e32 v14, 1, v14
1302 ; GFX9-G-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
1303 ; GFX9-G-NEXT: s_xor_b64 s[4:5], vcc, -1
1304 ; GFX9-G-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
1305 ; GFX9-G-NEXT: s_cbranch_execz .LBB0_6
1306 ; GFX9-G-NEXT: ; %bb.1: ; %udiv-bb1
1307 ; GFX9-G-NEXT: v_add_co_u32_e32 v20, vcc, 1, v0
1308 ; GFX9-G-NEXT: v_addc_co_u32_e32 v21, vcc, 0, v1, vcc
1309 ; GFX9-G-NEXT: v_addc_co_u32_e32 v22, vcc, 0, v2, vcc
1310 ; GFX9-G-NEXT: v_addc_co_u32_e32 v23, vcc, 0, v3, vcc
1311 ; GFX9-G-NEXT: s_xor_b64 s[4:5], vcc, -1
1312 ; GFX9-G-NEXT: v_sub_co_u32_e32 v12, vcc, 0x7f, v0
1313 ; GFX9-G-NEXT: v_sub_u32_e32 v0, 64, v12
1314 ; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v0, v[8:9]
1315 ; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], v12, v[10:11]
1316 ; GFX9-G-NEXT: v_add_u32_e32 v13, 0xffffffc0, v12
1317 ; GFX9-G-NEXT: v_lshlrev_b64 v[6:7], v12, v[8:9]
1318 ; GFX9-G-NEXT: v_or_b32_e32 v2, v0, v2
1319 ; GFX9-G-NEXT: v_or_b32_e32 v3, v1, v3
1320 ; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], v13, v[8:9]
1321 ; GFX9-G-NEXT: v_cmp_gt_u32_e32 vcc, 64, v12
1322 ; GFX9-G-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc
1323 ; GFX9-G-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc
1324 ; GFX9-G-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1325 ; GFX9-G-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1326 ; GFX9-G-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12
1327 ; GFX9-G-NEXT: v_cndmask_b32_e32 v12, v0, v10, vcc
1328 ; GFX9-G-NEXT: v_cndmask_b32_e32 v13, v1, v11, vcc
1329 ; GFX9-G-NEXT: s_mov_b64 s[10:11], s[8:9]
1330 ; GFX9-G-NEXT: v_mov_b32_e32 v0, s8
1331 ; GFX9-G-NEXT: v_mov_b32_e32 v1, s9
1332 ; GFX9-G-NEXT: v_mov_b32_e32 v2, s10
1333 ; GFX9-G-NEXT: v_mov_b32_e32 v3, s11
1334 ; GFX9-G-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
1335 ; GFX9-G-NEXT: s_xor_b64 s[12:13], exec, s[8:9]
1336 ; GFX9-G-NEXT: s_cbranch_execz .LBB0_5
1337 ; GFX9-G-NEXT: ; %bb.2: ; %udiv-preheader
1338 ; GFX9-G-NEXT: v_sub_u32_e32 v2, 64, v20
1339 ; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v20, v[8:9]
1340 ; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], v2, v[10:11]
1341 ; GFX9-G-NEXT: v_add_u32_e32 v24, 0xffffffc0, v20
1342 ; GFX9-G-NEXT: v_lshrrev_b64 v[14:15], v20, v[10:11]
1343 ; GFX9-G-NEXT: v_or_b32_e32 v2, v0, v2
1344 ; GFX9-G-NEXT: v_or_b32_e32 v3, v1, v3
1345 ; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11]
1346 ; GFX9-G-NEXT: v_cmp_gt_u32_e32 vcc, 64, v20
1347 ; GFX9-G-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1348 ; GFX9-G-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1349 ; GFX9-G-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc
1350 ; GFX9-G-NEXT: v_cndmask_b32_e32 v15, 0, v15, vcc
1351 ; GFX9-G-NEXT: v_add_co_u32_e32 v24, vcc, -1, v18
1352 ; GFX9-G-NEXT: s_mov_b64 s[8:9], 0
1353 ; GFX9-G-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v20
1354 ; GFX9-G-NEXT: v_addc_co_u32_e32 v25, vcc, -1, v19, vcc
1355 ; GFX9-G-NEXT: v_cndmask_b32_e64 v10, v0, v8, s[4:5]
1356 ; GFX9-G-NEXT: v_cndmask_b32_e64 v11, v1, v9, s[4:5]
1357 ; GFX9-G-NEXT: v_addc_co_u32_e32 v26, vcc, -1, v4, vcc
1358 ; GFX9-G-NEXT: s_mov_b64 s[10:11], s[8:9]
1359 ; GFX9-G-NEXT: v_mov_b32_e32 v0, s8
1360 ; GFX9-G-NEXT: v_addc_co_u32_e32 v27, vcc, -1, v5, vcc
1361 ; GFX9-G-NEXT: v_mov_b32_e32 v9, 0
1362 ; GFX9-G-NEXT: v_mov_b32_e32 v1, s9
1363 ; GFX9-G-NEXT: v_mov_b32_e32 v2, s10
1364 ; GFX9-G-NEXT: v_mov_b32_e32 v3, s11
1365 ; GFX9-G-NEXT: .LBB0_3: ; %udiv-do-while
1366 ; GFX9-G-NEXT: ; =>This Inner Loop Header: Depth=1
1367 ; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], 1, v[6:7]
1368 ; GFX9-G-NEXT: v_lshrrev_b32_e32 v8, 31, v7
1369 ; GFX9-G-NEXT: v_or_b32_e32 v6, v0, v2
1370 ; GFX9-G-NEXT: v_or_b32_e32 v7, v1, v3
1371 ; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], 1, v[10:11]
1372 ; GFX9-G-NEXT: v_lshrrev_b32_e32 v10, 31, v13
1373 ; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], 1, v[14:15]
1374 ; GFX9-G-NEXT: v_or_b32_e32 v2, v2, v10
1375 ; GFX9-G-NEXT: v_lshrrev_b32_e32 v14, 31, v11
1376 ; GFX9-G-NEXT: v_sub_co_u32_e32 v10, vcc, v24, v2
1377 ; GFX9-G-NEXT: v_or_b32_e32 v0, v0, v14
1378 ; GFX9-G-NEXT: v_subb_co_u32_e32 v10, vcc, v25, v3, vcc
1379 ; GFX9-G-NEXT: v_subb_co_u32_e32 v10, vcc, v26, v0, vcc
1380 ; GFX9-G-NEXT: v_subb_co_u32_e32 v10, vcc, v27, v1, vcc
1381 ; GFX9-G-NEXT: v_ashrrev_i32_e32 v28, 31, v10
1382 ; GFX9-G-NEXT: v_and_b32_e32 v10, v28, v18
1383 ; GFX9-G-NEXT: v_and_b32_e32 v11, v28, v19
1384 ; GFX9-G-NEXT: v_sub_co_u32_e32 v10, vcc, v2, v10
1385 ; GFX9-G-NEXT: v_subb_co_u32_e32 v11, vcc, v3, v11, vcc
1386 ; GFX9-G-NEXT: v_and_b32_e32 v2, v28, v4
1387 ; GFX9-G-NEXT: v_and_b32_e32 v3, v28, v5
1388 ; GFX9-G-NEXT: v_subb_co_u32_e32 v14, vcc, v0, v2, vcc
1389 ; GFX9-G-NEXT: v_subb_co_u32_e32 v15, vcc, v1, v3, vcc
1390 ; GFX9-G-NEXT: v_add_co_u32_e32 v20, vcc, -1, v20
1391 ; GFX9-G-NEXT: v_addc_co_u32_e32 v21, vcc, -1, v21, vcc
1392 ; GFX9-G-NEXT: v_addc_co_u32_e32 v22, vcc, -1, v22, vcc
1393 ; GFX9-G-NEXT: v_addc_co_u32_e32 v23, vcc, -1, v23, vcc
1394 ; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], 1, v[12:13]
1395 ; GFX9-G-NEXT: v_or_b32_e32 v0, v20, v22
1396 ; GFX9-G-NEXT: v_or_b32_e32 v1, v21, v23
1397 ; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1398 ; GFX9-G-NEXT: v_or_b32_e32 v12, v12, v8
1399 ; GFX9-G-NEXT: v_and_b32_e32 v8, 1, v28
1400 ; GFX9-G-NEXT: v_mov_b32_e32 v0, v8
1401 ; GFX9-G-NEXT: s_or_b64 s[8:9], vcc, s[8:9]
1402 ; GFX9-G-NEXT: v_mov_b32_e32 v1, v9
1403 ; GFX9-G-NEXT: s_andn2_b64 exec, exec, s[8:9]
1404 ; GFX9-G-NEXT: s_cbranch_execnz .LBB0_3
1405 ; GFX9-G-NEXT: ; %bb.4: ; %Flow
1406 ; GFX9-G-NEXT: s_or_b64 exec, exec, s[8:9]
1407 ; GFX9-G-NEXT: .LBB0_5: ; %Flow2
1408 ; GFX9-G-NEXT: s_or_b64 exec, exec, s[12:13]
1409 ; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], 1, v[6:7]
1410 ; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], 1, v[12:13]
1411 ; GFX9-G-NEXT: v_lshrrev_b32_e32 v4, 31, v7
1412 ; GFX9-G-NEXT: v_or_b32_e32 v12, v12, v4
1413 ; GFX9-G-NEXT: v_or_b32_e32 v6, v0, v2
1414 ; GFX9-G-NEXT: v_or_b32_e32 v7, v1, v3
1415 ; GFX9-G-NEXT: .LBB0_6: ; %Flow3
1416 ; GFX9-G-NEXT: s_or_b64 exec, exec, s[6:7]
1417 ; GFX9-G-NEXT: v_xor_b32_e32 v3, v17, v16
1418 ; GFX9-G-NEXT: v_xor_b32_e32 v0, v6, v3
1419 ; GFX9-G-NEXT: v_xor_b32_e32 v1, v7, v3
1420 ; GFX9-G-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
1421 ; GFX9-G-NEXT: v_xor_b32_e32 v2, v12, v3
1422 ; GFX9-G-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
1423 ; GFX9-G-NEXT: v_xor_b32_e32 v4, v13, v3
1424 ; GFX9-G-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v3, vcc
1425 ; GFX9-G-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
1426 ; GFX9-G-NEXT: s_setpc_b64 s[30:31]
1428 ; GFX9-G-O0-LABEL: v_sdiv_i128_vv:
1429 ; GFX9-G-O0: ; %bb.0: ; %_udiv-special-cases
1430 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1431 ; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
1432 ; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
1433 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
1434 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v1
1435 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2
1436 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3
1437 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
1438 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v10
1439 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9
1440 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v8
1441 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
1442 ; GFX9-G-O0-NEXT: s_nop 0
1443 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
1444 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
1445 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
1446 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v4
1447 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v5
1448 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v6
1449 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
1450 ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
1451 ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
1452 ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
1453 ; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
1454 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v1
1455 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v0
1456 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v7
1457 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
1458 ; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], 0x7f
1459 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr3_vgpr4 killed $exec
1460 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
1461 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5
1462 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v6
1463 ; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
1464 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v1
1465 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 31
1466 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s6
1467 ; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v11, v2, v7
1468 ; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
1469 ; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
1470 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 31
1471 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
1472 ; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v9, v0, v1
1473 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr12_vgpr13 killed $exec
1474 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v14
1475 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v15
1476 ; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
1477 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v1
1478 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 31
1479 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s6
1480 ; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v10, v2, v7
1481 ; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
1482 ; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
1483 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 31
1484 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
1485 ; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v8, v0, v1
1486 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3
1487 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4
1488 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5
1489 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6
1490 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v1
1491 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2
1492 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v0, v11, v0
1493 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v1, v11, v1
1494 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4
1495 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5
1496 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v9, v3
1497 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v9, v2
1498 ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v0, s[6:7], v0, v11
1499 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
1500 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v1, s[6:7], v1, v11, s[6:7]
1501 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
1502 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v5, s[6:7], v3, v9, s[6:7]
1503 ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
1504 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v2, s[6:7], v2, v9, s[6:7]
1505 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
1506 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12
1507 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v13
1508 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v14
1509 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15
1510 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v6
1511 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v7
1512 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v7, v10, v4
1513 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v10, v3
1514 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12
1515 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v13
1516 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v8, v6
1517 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v8, v3
1518 ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v7, s[6:7], v7, v10
1519 ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
1520 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v4, s[6:7], v4, v10, s[6:7]
1521 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
1522 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v6, s[6:7], v6, v8, s[6:7]
1523 ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
1524 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[6:7], v3, v8, s[6:7]
1525 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
1526 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v12, v10, v11
1527 ; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
1528 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v10, v10, v11
1529 ; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
1530 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v10, v8, v9
1531 ; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
1532 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v8, v8, v9
1533 ; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
1534 ; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v7, v6
1535 ; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v4, v3
1536 ; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1537 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
1538 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5
1539 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4
1540 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[6:7], v[8:9], v[10:11]
1541 ; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v0, v5
1542 ; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v1, v2
1543 ; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1544 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
1545 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5
1546 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4
1547 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], v[8:9], v[10:11]
1548 ; GFX9-G-O0-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
1549 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v6
1550 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v3
1551 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5
1552 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4
1553 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], v[8:9], v[10:11]
1554 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v4
1555 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v7
1556 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32
1557 ; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8
1558 ; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v7
1559 ; GFX9-G-O0-NEXT: s_mov_b32 s12, 64
1560 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s12
1561 ; GFX9-G-O0-NEXT: v_add_u32_e64 v4, v4, v7
1562 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v3, v3
1563 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6
1564 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32
1565 ; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7
1566 ; GFX9-G-O0-NEXT: v_min_u32_e64 v3, v3, v6
1567 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[10:11]
1568 ; GFX9-G-O0-NEXT: s_mov_b32 s16, 0
1569 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v5
1570 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v2
1571 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s5
1572 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s4
1573 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], v[6:7], v[8:9]
1574 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v1
1575 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v0
1576 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32
1577 ; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7
1578 ; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v6
1579 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s12
1580 ; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v4, v6
1581 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v2
1582 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v5
1583 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32
1584 ; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8
1585 ; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v7
1586 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[10:11]
1587 ; GFX9-G-O0-NEXT: s_mov_b32 s15, 0
1588 ; GFX9-G-O0-NEXT: s_mov_b32 s13, 0
1589 ; GFX9-G-O0-NEXT: s_mov_b32 s14, 0
1590 ; GFX9-G-O0-NEXT: s_mov_b32 s12, 0
1591 ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v6, s[10:11], v3, v4
1592 ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
1593 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s16
1594 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s16
1595 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[10:11], v3, v4, s[10:11]
1596 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
1597 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s15
1598 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s14
1599 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[10:11], v4, v7, s[10:11]
1600 ; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
1601 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s13
1602 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s12
1603 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[10:11], v4, v7, s[10:11]
1604 ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
1605 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v6
1606 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v3
1607 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v8
1608 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v7
1609 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s9
1610 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s8
1611 ; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[12:13], v[11:12], v[13:14]
1612 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5
1613 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4
1614 ; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[10:11], v[9:10], v[11:12]
1615 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5
1616 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4
1617 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12]
1618 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 1
1619 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
1620 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[12:13]
1621 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1
1622 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
1623 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
1624 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[8:9]
1625 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1
1626 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
1627 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7]
1628 ; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v9
1629 ; GFX9-G-O0-NEXT: s_mov_b32 s7, 0x7f
1630 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
1631 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v6, s7
1632 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v3, s6
1633 ; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v6, v8
1634 ; GFX9-G-O0-NEXT: v_or_b32_e64 v3, v3, v7
1635 ; GFX9-G-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
1636 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v3
1637 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s5
1638 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s4
1639 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[6:7], v[8:9]
1640 ; GFX9-G-O0-NEXT: v_and_b32_e32 v3, 1, v4
1641 ; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v3
1642 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0
1643 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0
1644 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[6:7]
1645 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v1, v3, s[6:7]
1646 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1647 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3
1648 ; GFX9-G-O0-NEXT: v_and_b32_e32 v3, 1, v4
1649 ; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v3
1650 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0
1651 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0
1652 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[6:7]
1653 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7]
1654 ; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
1655 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v2
1656 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
1657 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5
1658 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6
1659 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 1
1660 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0
1661 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
1662 ; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v5
1663 ; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v4
1664 ; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4
1665 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], -1
1666 ; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
1667 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1668 ; GFX9-G-O0-NEXT: s_nop 0
1669 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
1670 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
1671 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
1672 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], exec
1673 ; GFX9-G-O0-NEXT: ; implicit-def: $vgpr34 : SGPR spill to VGPR lane
1674 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s4, 0
1675 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s5, 1
1676 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
1677 ; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
1678 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
1679 ; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
1680 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
1681 ; GFX9-G-O0-NEXT: s_cbranch_execz .LBB0_3
1682 ; GFX9-G-O0-NEXT: s_branch .LBB0_8
1683 ; GFX9-G-O0-NEXT: .LBB0_1: ; %Flow
1684 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
1685 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
1686 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
1687 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
1688 ; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 2
1689 ; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 3
1690 ; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
1691 ; GFX9-G-O0-NEXT: ; %bb.2: ; %Flow
1692 ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
1693 ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
1694 ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
1695 ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
1696 ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
1697 ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
1698 ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
1699 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
1700 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
1701 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
1702 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
1703 ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
1704 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
1705 ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
1706 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
1707 ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
1708 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
1709 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
1710 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
1711 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
1712 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
1713 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
1714 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
1715 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
1716 ; GFX9-G-O0-NEXT: s_branch .LBB0_5
1717 ; GFX9-G-O0-NEXT: .LBB0_3: ; %Flow2
1718 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
1719 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
1720 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
1721 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
1722 ; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 0
1723 ; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 1
1724 ; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
1725 ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1726 ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
1727 ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
1728 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
1729 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
1730 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
1731 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
1732 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
1733 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
1734 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
1735 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
1736 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
1737 ; GFX9-G-O0-NEXT: s_branch .LBB0_9
1738 ; GFX9-G-O0-NEXT: .LBB0_4: ; %udiv-loop-exit
1739 ; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
1740 ; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
1741 ; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
1742 ; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
1743 ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
1744 ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
1745 ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
1746 ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
1747 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2)
1748 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4
1749 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5
1750 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
1751 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v6
1752 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v7
1753 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 1
1754 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4
1755 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[10:11], v0, v[2:3]
1756 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4
1757 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[4:5]
1758 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr2 killed $exec
1759 ; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec
1760 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 31
1761 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4
1762 ; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v6, v2, v3
1763 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0
1764 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v0
1765 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1
1766 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v14
1767 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15
1768 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16
1769 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17
1770 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v12
1771 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v13
1772 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v10
1773 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v11
1774 ; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v7
1775 ; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v1, v5
1776 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1777 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v5
1778 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8
1779 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v9
1780 ; GFX9-G-O0-NEXT: v_or3_b32 v4, v4, v6, v7
1781 ; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v5
1782 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
1783 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v2
1784 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
1785 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4
1786 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5
1787 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
1788 ; GFX9-G-O0-NEXT: s_nop 0
1789 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
1790 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
1791 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
1792 ; GFX9-G-O0-NEXT: s_branch .LBB0_3
1793 ; GFX9-G-O0-NEXT: .LBB0_5: ; %Flow1
1794 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
1795 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
1796 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
1797 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
1798 ; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 4
1799 ; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 5
1800 ; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
1801 ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
1802 ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
1803 ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
1804 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
1805 ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
1806 ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
1807 ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
1808 ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
1809 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
1810 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
1811 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
1812 ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
1813 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
1814 ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
1815 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
1816 ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
1817 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
1818 ; GFX9-G-O0-NEXT: s_nop 0
1819 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
1820 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
1821 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
1822 ; GFX9-G-O0-NEXT: s_branch .LBB0_4
1823 ; GFX9-G-O0-NEXT: .LBB0_6: ; %udiv-do-while
1824 ; GFX9-G-O0-NEXT: ; =>This Inner Loop Header: Depth=1
1825 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
1826 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
1827 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
1828 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
1829 ; GFX9-G-O0-NEXT: v_readlane_b32 s6, v34, 6
1830 ; GFX9-G-O0-NEXT: v_readlane_b32 s7, v34, 7
1831 ; GFX9-G-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
1832 ; GFX9-G-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
1833 ; GFX9-G-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
1834 ; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
1835 ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
1836 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
1837 ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
1838 ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
1839 ; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
1840 ; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
1841 ; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
1842 ; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
1843 ; GFX9-G-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
1844 ; GFX9-G-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
1845 ; GFX9-G-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
1846 ; GFX9-G-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
1847 ; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
1848 ; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
1849 ; GFX9-G-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
1850 ; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
1851 ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
1852 ; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
1853 ; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
1854 ; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
1855 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
1856 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(18)
1857 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v2
1858 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3
1859 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(16)
1860 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4
1861 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5
1862 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 1
1863 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
1864 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[14:15], v2, v[0:1]
1865 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
1866 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[3:4]
1867 ; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
1868 ; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
1869 ; GFX9-G-O0-NEXT: s_mov_b32 s9, 31
1870 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s9
1871 ; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v3, v0, v1
1872 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0
1873 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4
1874 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5
1875 ; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v2, v3
1876 ; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v0, v1
1877 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr22_vgpr23 killed $exec
1878 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v24
1879 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v25
1880 ; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
1881 ; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
1882 ; GFX9-G-O0-NEXT: s_mov_b32 s9, 31
1883 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s9
1884 ; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v3, v0, v1
1885 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0
1886 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v14
1887 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v15
1888 ; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v2, v3
1889 ; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v0, v1
1890 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v22
1891 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v23
1892 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v24
1893 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v25
1894 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
1895 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[26:27], v0, v[2:3]
1896 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
1897 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[14:15]
1898 ; GFX9-G-O0-NEXT: ; kill: def $vgpr14 killed $vgpr2 killed $exec
1899 ; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec
1900 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 31
1901 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
1902 ; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v22, v2, v3
1903 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0
1904 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v0
1905 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1
1906 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(10)
1907 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v28, v30
1908 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v29, v31
1909 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(8)
1910 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v32
1911 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v25, v33
1912 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v28
1913 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v29
1914 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v26
1915 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v27
1916 ; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v23
1917 ; GFX9-G-O0-NEXT: v_or_b32_e64 v15, v1, v15
1918 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1919 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v15
1920 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v24
1921 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v25
1922 ; GFX9-G-O0-NEXT: v_or3_b32 v14, v14, v22, v23
1923 ; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v15
1924 ; GFX9-G-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
1925 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v2
1926 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
1927 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v14
1928 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v15
1929 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
1930 ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v13, s[8:9], v13, v4
1931 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v12, s[8:9], v12, v9, s[8:9]
1932 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v10, v7, s[8:9]
1933 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v12, s[8:9], v6, v5, s[8:9]
1934 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 31
1935 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8
1936 ; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v10, v6, v12
1937 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 31
1938 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8
1939 ; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v6, v6, v12
1940 ; GFX9-G-O0-NEXT: s_mov_b32 s9, 1
1941 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 0
1942 ; GFX9-G-O0-NEXT: v_and_b32_e64 v12, v10, s9
1943 ; GFX9-G-O0-NEXT: v_and_b32_e64 v14, v10, s8
1944 ; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
1945 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v14
1946 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, s5
1947 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, s4
1948 ; GFX9-G-O0-NEXT: ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
1949 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v22
1950 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v23
1951 ; GFX9-G-O0-NEXT: v_and_b32_e64 v11, v10, v11
1952 ; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v10, v21
1953 ; GFX9-G-O0-NEXT: v_and_b32_e64 v8, v6, v8
1954 ; GFX9-G-O0-NEXT: v_and_b32_e64 v6, v6, v20
1955 ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v4, s[8:9], v4, v11
1956 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v9, v10, s[8:9]
1957 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v9, s[8:9], v7, v8, s[8:9]
1958 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v5, v6, s[8:9]
1959 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
1960 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v10
1961 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v9
1962 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8
1963 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v16
1964 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v17
1965 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v18
1966 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v19
1967 ; GFX9-G-O0-NEXT: s_mov_b32 s8, -1
1968 ; GFX9-G-O0-NEXT: s_mov_b32 s12, -1
1969 ; GFX9-G-O0-NEXT: s_mov_b32 s11, -1
1970 ; GFX9-G-O0-NEXT: s_mov_b32 s10, -1
1971 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s8
1972 ; GFX9-G-O0-NEXT: v_add_co_u32_e64 v16, s[8:9], v11, v16
1973 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s12
1974 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v17, s[8:9], v10, v11, s[8:9]
1975 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s11
1976 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v19, s[8:9], v9, v10, s[8:9]
1977 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s10
1978 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v18, s[8:9], v8, v9, s[8:9]
1979 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16
1980 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17
1981 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v19
1982 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v18
1983 ; GFX9-G-O0-NEXT: v_or_b32_e64 v16, v16, v19
1984 ; GFX9-G-O0-NEXT: v_or_b32_e64 v18, v17, v18
1985 ; GFX9-G-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
1986 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v18
1987 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, s5
1988 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, s4
1989 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[18:19]
1990 ; GFX9-G-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
1991 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v3
1992 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v2
1993 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v1
1994 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v0
1995 ; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
1996 ; GFX9-G-O0-NEXT: s_nop 0
1997 ; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
1998 ; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
1999 ; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
2000 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v15
2001 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v14
2002 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v13
2003 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v12
2004 ; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
2005 ; GFX9-G-O0-NEXT: s_nop 0
2006 ; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
2007 ; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
2008 ; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
2009 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
2010 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 2
2011 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 3
2012 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
2013 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 6
2014 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 7
2015 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
2016 ; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
2017 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
2018 ; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
2019 ; GFX9-G-O0-NEXT: s_nop 0
2020 ; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
2021 ; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
2022 ; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
2023 ; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
2024 ; GFX9-G-O0-NEXT: s_nop 0
2025 ; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
2026 ; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
2027 ; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
2028 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
2029 ; GFX9-G-O0-NEXT: s_nop 0
2030 ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
2031 ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
2032 ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
2033 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
2034 ; GFX9-G-O0-NEXT: s_nop 0
2035 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
2036 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
2037 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
2038 ; GFX9-G-O0-NEXT: s_andn2_b64 exec, exec, s[4:5]
2039 ; GFX9-G-O0-NEXT: s_cbranch_execnz .LBB0_6
2040 ; GFX9-G-O0-NEXT: s_branch .LBB0_1
2041 ; GFX9-G-O0-NEXT: .LBB0_7: ; %udiv-preheader
2042 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
2043 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
2044 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
2045 ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
2046 ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
2047 ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
2048 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
2049 ; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
2050 ; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
2051 ; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
2052 ; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
2053 ; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
2054 ; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
2055 ; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
2056 ; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
2057 ; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
2058 ; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
2059 ; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
2060 ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
2061 ; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
2062 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 64
2063 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
2064 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v17
2065 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v16
2066 ; GFX9-G-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
2067 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
2068 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v4
2069 ; GFX9-G-O0-NEXT: s_mov_b32 s5, 0xffffffc0
2070 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s5
2071 ; GFX9-G-O0-NEXT: v_add_u32_e64 v4, v18, v4
2072 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4
2073 ; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v18
2074 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
2075 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4
2076 ; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v18, v6
2077 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s6
2078 ; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v18, v6
2079 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
2080 ; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v18, v[20:21]
2081 ; GFX9-G-O0-NEXT: v_lshrrev_b64 v[25:26], v18, v[22:23]
2082 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[23:24], v5, v[20:21]
2083 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v25
2084 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v26
2085 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v23
2086 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v24
2087 ; GFX9-G-O0-NEXT: v_or_b32_e64 v19, v19, v22
2088 ; GFX9-G-O0-NEXT: v_or_b32_e64 v18, v5, v18
2089 ; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], 0
2090 ; GFX9-G-O0-NEXT: v_lshrrev_b64 v[20:21], v4, v[20:21]
2091 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v20
2092 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v21
2093 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v19, s[4:5]
2094 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v18, s[4:5]
2095 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v17, s[6:7]
2096 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v16, v5, v16, s[6:7]
2097 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2098 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v16
2099 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v6
2100 ; GFX9-G-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
2101 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, 0
2102 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0
2103 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[4:5]
2104 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
2105 ; GFX9-G-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
2106 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v6
2107 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
2108 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v16
2109 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v17
2110 ; GFX9-G-O0-NEXT: s_mov_b32 s4, -1
2111 ; GFX9-G-O0-NEXT: s_mov_b32 s10, -1
2112 ; GFX9-G-O0-NEXT: s_mov_b32 s7, -1
2113 ; GFX9-G-O0-NEXT: s_mov_b32 s6, -1
2114 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s4
2115 ; GFX9-G-O0-NEXT: v_add_co_u32_e64 v15, s[4:5], v15, v16
2116 ; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
2117 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s10
2118 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v14, s[4:5], v14, v15, s[4:5]
2119 ; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
2120 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s7
2121 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v13, s[4:5], v13, v14, s[4:5]
2122 ; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
2123 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s6
2124 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v12, s[4:5], v12, v13, s[4:5]
2125 ; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
2126 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], s[8:9]
2127 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[8:9]
2128 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s8, 6
2129 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s9, 7
2130 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
2131 ; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
2132 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
2133 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s7
2134 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s6
2135 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s5
2136 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s4
2137 ; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
2138 ; GFX9-G-O0-NEXT: s_nop 0
2139 ; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
2140 ; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
2141 ; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
2142 ; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
2143 ; GFX9-G-O0-NEXT: s_nop 0
2144 ; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
2145 ; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
2146 ; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
2147 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
2148 ; GFX9-G-O0-NEXT: s_nop 0
2149 ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
2150 ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
2151 ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
2152 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
2153 ; GFX9-G-O0-NEXT: s_nop 0
2154 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
2155 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
2156 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
2157 ; GFX9-G-O0-NEXT: s_branch .LBB0_6
2158 ; GFX9-G-O0-NEXT: .LBB0_8: ; %udiv-bb1
2159 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
2160 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
2161 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
2162 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
2163 ; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
2164 ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
2165 ; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
2166 ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
2167 ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
2168 ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
2169 ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
2170 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
2171 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 1
2172 ; GFX9-G-O0-NEXT: s_mov_b32 s10, 0
2173 ; GFX9-G-O0-NEXT: s_mov_b32 s9, 0
2174 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 0
2175 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s6
2176 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
2177 ; GFX9-G-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v2, v4
2178 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
2179 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10
2180 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
2181 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v5, v7, s[6:7]
2182 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s9
2183 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v6, v7, s[6:7]
2184 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8
2185 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v1, v6, s[6:7]
2186 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v4
2187 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v5
2188 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v7
2189 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v6
2190 ; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
2191 ; GFX9-G-O0-NEXT: s_nop 0
2192 ; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
2193 ; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
2194 ; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
2195 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0x7f
2196 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6
2197 ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v8, s[6:7], v1, v2
2198 ; GFX9-G-O0-NEXT: s_mov_b32 s7, 64
2199 ; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
2200 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v0
2201 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v9
2202 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v3
2203 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0xffffffc0
2204 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
2205 ; GFX9-G-O0-NEXT: v_add_u32_e64 v2, v8, v0
2206 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
2207 ; GFX9-G-O0-NEXT: v_sub_u32_e64 v14, v0, v8
2208 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
2209 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
2210 ; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v8, v0
2211 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
2212 ; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v8, v0
2213 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v8, v[12:13]
2214 ; GFX9-G-O0-NEXT: v_lshrrev_b64 v[17:18], v14, v[12:13]
2215 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[15:16], v8, v[10:11]
2216 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v17
2217 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v18
2218 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v15
2219 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v16
2220 ; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v11, v14
2221 ; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v8, v10
2222 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[12:13], v2, v[12:13]
2223 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v0
2224 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1
2225 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, 0
2226 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0
2227 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[8:9]
2228 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[8:9]
2229 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
2230 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2
2231 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v12
2232 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v13
2233 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v11, s[8:9]
2234 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[8:9]
2235 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7]
2236 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7]
2237 ; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
2238 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2
2239 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
2240 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8
2241 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9
2242 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
2243 ; GFX9-G-O0-NEXT: s_nop 0
2244 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
2245 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
2246 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
2247 ; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], s[4:5]
2248 ; GFX9-G-O0-NEXT: s_mov_b64 s[10:11], s[4:5]
2249 ; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v7
2250 ; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v5, v6
2251 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2252 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6
2253 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s5
2254 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4
2255 ; GFX9-G-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[4:5], v[6:7]
2256 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
2257 ; GFX9-G-O0-NEXT: s_nop 0
2258 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
2259 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
2260 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
2261 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
2262 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s9
2263 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s10
2264 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s11
2265 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
2266 ; GFX9-G-O0-NEXT: s_nop 0
2267 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
2268 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
2269 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
2270 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], exec
2271 ; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
2272 ; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
2273 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 4
2274 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 5
2275 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
2276 ; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
2277 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
2278 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
2279 ; GFX9-G-O0-NEXT: s_cbranch_execz .LBB0_5
2280 ; GFX9-G-O0-NEXT: s_branch .LBB0_7
2281 ; GFX9-G-O0-NEXT: .LBB0_9: ; %udiv-end
2282 ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
2283 ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
2284 ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
2285 ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
2286 ; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
2287 ; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
2288 ; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
2289 ; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
2290 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2)
2291 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v8
2292 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9
2293 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
2294 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v10
2295 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v11
2296 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v1
2297 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2
2298 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v0, v0, v7
2299 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v1, v1, v6
2300 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8
2301 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9
2302 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v2, v5
2303 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v3, v4
2304 ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v0, s[4:5], v0, v7
2305 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v1, s[4:5], v1, v6, s[4:5]
2306 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v5, s[4:5]
2307 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
2308 ; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
2309 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
2310 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
2311 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
2312 ; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31]
3380 ; GFX9-G-LABEL: v_udiv_i128_vv:
3381 ; GFX9-G: ; %bb.0: ; %_udiv-special-cases
3382 ; GFX9-G-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3383 ; GFX9-G-NEXT: v_or_b32_e32 v8, v4, v6
3384 ; GFX9-G-NEXT: v_or_b32_e32 v9, v5, v7
3385 ; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
3386 ; GFX9-G-NEXT: v_or_b32_e32 v8, v0, v2
3387 ; GFX9-G-NEXT: v_or_b32_e32 v9, v1, v3
3388 ; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[8:9]
3389 ; GFX9-G-NEXT: v_ffbh_u32_e32 v9, v4
3390 ; GFX9-G-NEXT: v_ffbh_u32_e32 v8, v5
3391 ; GFX9-G-NEXT: v_add_u32_e32 v9, 32, v9
3392 ; GFX9-G-NEXT: v_ffbh_u32_e32 v10, v6
3393 ; GFX9-G-NEXT: v_min_u32_e32 v8, v8, v9
3394 ; GFX9-G-NEXT: v_ffbh_u32_e32 v9, v7
3395 ; GFX9-G-NEXT: v_add_u32_e32 v10, 32, v10
3396 ; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[6:7]
3397 ; GFX9-G-NEXT: v_add_u32_e32 v8, 64, v8
3398 ; GFX9-G-NEXT: v_min_u32_e32 v9, v9, v10
3399 ; GFX9-G-NEXT: v_ffbh_u32_e32 v10, v0
3400 ; GFX9-G-NEXT: v_cndmask_b32_e64 v8, v9, v8, s[6:7]
3401 ; GFX9-G-NEXT: v_ffbh_u32_e32 v9, v1
3402 ; GFX9-G-NEXT: v_add_u32_e32 v10, 32, v10
3403 ; GFX9-G-NEXT: v_ffbh_u32_e32 v11, v2
3404 ; GFX9-G-NEXT: v_min_u32_e32 v9, v9, v10
3405 ; GFX9-G-NEXT: v_ffbh_u32_e32 v10, v3
3406 ; GFX9-G-NEXT: v_add_u32_e32 v11, 32, v11
3407 ; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[2:3]
3408 ; GFX9-G-NEXT: v_add_u32_e32 v9, 64, v9
3409 ; GFX9-G-NEXT: v_min_u32_e32 v10, v10, v11
3410 ; GFX9-G-NEXT: v_cndmask_b32_e64 v9, v10, v9, s[6:7]
3411 ; GFX9-G-NEXT: v_sub_co_u32_e64 v12, s[6:7], v8, v9
3412 ; GFX9-G-NEXT: v_subb_co_u32_e64 v13, s[6:7], 0, 0, s[6:7]
3413 ; GFX9-G-NEXT: v_mov_b32_e32 v8, 0x7f
3414 ; GFX9-G-NEXT: v_subb_co_u32_e64 v14, s[6:7], 0, 0, s[6:7]
3415 ; GFX9-G-NEXT: v_mov_b32_e32 v9, 0
3416 ; GFX9-G-NEXT: v_subb_co_u32_e64 v15, s[6:7], 0, 0, s[6:7]
3417 ; GFX9-G-NEXT: v_cmp_gt_u64_e64 s[6:7], v[12:13], v[8:9]
3418 ; GFX9-G-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
3419 ; GFX9-G-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[6:7]
3420 ; GFX9-G-NEXT: v_cmp_lt_u64_e64 s[6:7], 0, v[14:15]
3421 ; GFX9-G-NEXT: v_or_b32_e32 v17, v13, v15
3422 ; GFX9-G-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[6:7]
3423 ; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[14:15]
3424 ; GFX9-G-NEXT: s_mov_b64 s[8:9], 0
3425 ; GFX9-G-NEXT: v_cndmask_b32_e64 v8, v9, v8, s[6:7]
3426 ; GFX9-G-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5]
3427 ; GFX9-G-NEXT: v_or_b32_e32 v18, v9, v8
3428 ; GFX9-G-NEXT: v_xor_b32_e32 v8, 0x7f, v12
3429 ; GFX9-G-NEXT: v_or_b32_e32 v16, v8, v14
3430 ; GFX9-G-NEXT: v_and_b32_e32 v8, 1, v18
3431 ; GFX9-G-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
3432 ; GFX9-G-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc
3433 ; GFX9-G-NEXT: v_cndmask_b32_e64 v11, v1, 0, vcc
3434 ; GFX9-G-NEXT: v_cndmask_b32_e64 v8, v2, 0, vcc
3435 ; GFX9-G-NEXT: v_cndmask_b32_e64 v9, v3, 0, vcc
3436 ; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
3437 ; GFX9-G-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
3438 ; GFX9-G-NEXT: v_or_b32_e32 v16, v18, v16
3439 ; GFX9-G-NEXT: v_and_b32_e32 v16, 1, v16
3440 ; GFX9-G-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16
3441 ; GFX9-G-NEXT: s_xor_b64 s[4:5], vcc, -1
3442 ; GFX9-G-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
3443 ; GFX9-G-NEXT: s_cbranch_execz .LBB1_6
3444 ; GFX9-G-NEXT: ; %bb.1: ; %udiv-bb1
3445 ; GFX9-G-NEXT: v_add_co_u32_e32 v18, vcc, 1, v12
3446 ; GFX9-G-NEXT: v_addc_co_u32_e32 v19, vcc, 0, v13, vcc
3447 ; GFX9-G-NEXT: v_addc_co_u32_e32 v20, vcc, 0, v14, vcc
3448 ; GFX9-G-NEXT: v_addc_co_u32_e32 v21, vcc, 0, v15, vcc
3449 ; GFX9-G-NEXT: s_xor_b64 s[4:5], vcc, -1
3450 ; GFX9-G-NEXT: v_sub_co_u32_e32 v16, vcc, 0x7f, v12
3451 ; GFX9-G-NEXT: v_sub_u32_e32 v8, 64, v16
3452 ; GFX9-G-NEXT: v_lshrrev_b64 v[8:9], v8, v[0:1]
3453 ; GFX9-G-NEXT: v_lshlrev_b64 v[10:11], v16, v[2:3]
3454 ; GFX9-G-NEXT: v_add_u32_e32 v14, 0xffffffc0, v16
3455 ; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], v16, v[0:1]
3456 ; GFX9-G-NEXT: v_or_b32_e32 v10, v8, v10
3457 ; GFX9-G-NEXT: v_or_b32_e32 v11, v9, v11
3458 ; GFX9-G-NEXT: v_lshlrev_b64 v[8:9], v14, v[0:1]
3459 ; GFX9-G-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16
3460 ; GFX9-G-NEXT: s_mov_b64 s[10:11], s[8:9]
3461 ; GFX9-G-NEXT: v_cndmask_b32_e32 v14, 0, v12, vcc
3462 ; GFX9-G-NEXT: v_cndmask_b32_e32 v15, 0, v13, vcc
3463 ; GFX9-G-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
3464 ; GFX9-G-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
3465 ; GFX9-G-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16
3466 ; GFX9-G-NEXT: v_mov_b32_e32 v13, s11
3467 ; GFX9-G-NEXT: v_cndmask_b32_e32 v8, v8, v2, vcc
3468 ; GFX9-G-NEXT: v_cndmask_b32_e32 v9, v9, v3, vcc
3469 ; GFX9-G-NEXT: v_mov_b32_e32 v11, s9
3470 ; GFX9-G-NEXT: v_mov_b32_e32 v10, s8
3471 ; GFX9-G-NEXT: v_mov_b32_e32 v12, s10
3472 ; GFX9-G-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
3473 ; GFX9-G-NEXT: s_xor_b64 s[12:13], exec, s[8:9]
3474 ; GFX9-G-NEXT: s_cbranch_execz .LBB1_5
3475 ; GFX9-G-NEXT: ; %bb.2: ; %udiv-preheader
3476 ; GFX9-G-NEXT: v_sub_u32_e32 v12, 64, v18
3477 ; GFX9-G-NEXT: v_add_u32_e32 v22, 0xffffffc0, v18
3478 ; GFX9-G-NEXT: v_lshrrev_b64 v[10:11], v18, v[0:1]
3479 ; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], v12, v[2:3]
3480 ; GFX9-G-NEXT: v_lshrrev_b64 v[16:17], v18, v[2:3]
3481 ; GFX9-G-NEXT: v_lshrrev_b64 v[2:3], v22, v[2:3]
3482 ; GFX9-G-NEXT: v_or_b32_e32 v10, v10, v12
3483 ; GFX9-G-NEXT: v_or_b32_e32 v11, v11, v13
3484 ; GFX9-G-NEXT: v_cmp_gt_u32_e32 vcc, 64, v18
3485 ; GFX9-G-NEXT: s_mov_b64 s[8:9], 0
3486 ; GFX9-G-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
3487 ; GFX9-G-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
3488 ; GFX9-G-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc
3489 ; GFX9-G-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc
3490 ; GFX9-G-NEXT: v_add_co_u32_e32 v22, vcc, -1, v4
3491 ; GFX9-G-NEXT: v_addc_co_u32_e32 v23, vcc, -1, v5, vcc
3492 ; GFX9-G-NEXT: s_mov_b64 s[10:11], s[8:9]
3493 ; GFX9-G-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v18
3494 ; GFX9-G-NEXT: v_addc_co_u32_e32 v24, vcc, -1, v6, vcc
3495 ; GFX9-G-NEXT: v_mov_b32_e32 v13, s11
3496 ; GFX9-G-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5]
3497 ; GFX9-G-NEXT: v_cndmask_b32_e64 v3, v3, v1, s[4:5]
3498 ; GFX9-G-NEXT: v_addc_co_u32_e32 v25, vcc, -1, v7, vcc
3499 ; GFX9-G-NEXT: v_mov_b32_e32 v1, 0
3500 ; GFX9-G-NEXT: v_mov_b32_e32 v11, s9
3501 ; GFX9-G-NEXT: v_mov_b32_e32 v10, s8
3502 ; GFX9-G-NEXT: v_mov_b32_e32 v12, s10
3503 ; GFX9-G-NEXT: .LBB1_3: ; %udiv-do-while
3504 ; GFX9-G-NEXT: ; =>This Inner Loop Header: Depth=1
3505 ; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], 1, v[14:15]
3506 ; GFX9-G-NEXT: v_lshrrev_b32_e32 v0, 31, v15
3507 ; GFX9-G-NEXT: v_or_b32_e32 v14, v10, v12
3508 ; GFX9-G-NEXT: v_or_b32_e32 v15, v11, v13
3509 ; GFX9-G-NEXT: v_lshlrev_b64 v[10:11], 1, v[16:17]
3510 ; GFX9-G-NEXT: v_lshrrev_b32_e32 v12, 31, v3
3511 ; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
3512 ; GFX9-G-NEXT: v_or_b32_e32 v10, v10, v12
3513 ; GFX9-G-NEXT: v_lshrrev_b32_e32 v12, 31, v9
3514 ; GFX9-G-NEXT: v_or_b32_e32 v2, v2, v12
3515 ; GFX9-G-NEXT: v_sub_co_u32_e32 v12, vcc, v22, v2
3516 ; GFX9-G-NEXT: v_subb_co_u32_e32 v12, vcc, v23, v3, vcc
3517 ; GFX9-G-NEXT: v_subb_co_u32_e32 v12, vcc, v24, v10, vcc
3518 ; GFX9-G-NEXT: v_subb_co_u32_e32 v12, vcc, v25, v11, vcc
3519 ; GFX9-G-NEXT: v_ashrrev_i32_e32 v12, 31, v12
3520 ; GFX9-G-NEXT: v_and_b32_e32 v13, v12, v4
3521 ; GFX9-G-NEXT: v_and_b32_e32 v16, v12, v5
3522 ; GFX9-G-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v13
3523 ; GFX9-G-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v16, vcc
3524 ; GFX9-G-NEXT: v_and_b32_e32 v13, v12, v6
3525 ; GFX9-G-NEXT: v_and_b32_e32 v17, v12, v7
3526 ; GFX9-G-NEXT: v_subb_co_u32_e32 v16, vcc, v10, v13, vcc
3527 ; GFX9-G-NEXT: v_subb_co_u32_e32 v17, vcc, v11, v17, vcc
3528 ; GFX9-G-NEXT: v_add_co_u32_e32 v18, vcc, -1, v18
3529 ; GFX9-G-NEXT: v_addc_co_u32_e32 v19, vcc, -1, v19, vcc
3530 ; GFX9-G-NEXT: v_addc_co_u32_e32 v20, vcc, -1, v20, vcc
3531 ; GFX9-G-NEXT: v_addc_co_u32_e32 v21, vcc, -1, v21, vcc
3532 ; GFX9-G-NEXT: v_or_b32_e32 v10, v18, v20
3533 ; GFX9-G-NEXT: v_or_b32_e32 v11, v19, v21
3534 ; GFX9-G-NEXT: v_lshlrev_b64 v[8:9], 1, v[8:9]
3535 ; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
3536 ; GFX9-G-NEXT: v_or_b32_e32 v8, v8, v0
3537 ; GFX9-G-NEXT: v_and_b32_e32 v0, 1, v12
3538 ; GFX9-G-NEXT: v_mov_b32_e32 v11, v1
3539 ; GFX9-G-NEXT: s_or_b64 s[8:9], vcc, s[8:9]
3540 ; GFX9-G-NEXT: v_mov_b32_e32 v10, v0
3541 ; GFX9-G-NEXT: s_andn2_b64 exec, exec, s[8:9]
3542 ; GFX9-G-NEXT: s_cbranch_execnz .LBB1_3
3543 ; GFX9-G-NEXT: ; %bb.4: ; %Flow
3544 ; GFX9-G-NEXT: s_or_b64 exec, exec, s[8:9]
3545 ; GFX9-G-NEXT: .LBB1_5: ; %Flow2
3546 ; GFX9-G-NEXT: s_or_b64 exec, exec, s[12:13]
3547 ; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], 1, v[14:15]
3548 ; GFX9-G-NEXT: v_lshlrev_b64 v[8:9], 1, v[8:9]
3549 ; GFX9-G-NEXT: v_lshrrev_b32_e32 v2, 31, v15
3550 ; GFX9-G-NEXT: v_or_b32_e32 v8, v8, v2
3551 ; GFX9-G-NEXT: v_or_b32_e32 v10, v10, v0
3552 ; GFX9-G-NEXT: v_or_b32_e32 v11, v11, v1
3553 ; GFX9-G-NEXT: .LBB1_6: ; %Flow3
3554 ; GFX9-G-NEXT: s_or_b64 exec, exec, s[6:7]
3555 ; GFX9-G-NEXT: v_mov_b32_e32 v0, v10
3556 ; GFX9-G-NEXT: v_mov_b32_e32 v1, v11
3557 ; GFX9-G-NEXT: v_mov_b32_e32 v2, v8
3558 ; GFX9-G-NEXT: v_mov_b32_e32 v3, v9
3559 ; GFX9-G-NEXT: s_setpc_b64 s[30:31]
3561 ; GFX9-G-O0-LABEL: v_udiv_i128_vv:
3562 ; GFX9-G-O0: ; %bb.0: ; %_udiv-special-cases
3563 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3564 ; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
3565 ; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
3566 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
3567 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v1
3568 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2
3569 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3
3570 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
3571 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v10
3572 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9
3573 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v8
3574 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
3575 ; GFX9-G-O0-NEXT: s_nop 0
3576 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
3577 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
3578 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
3579 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5
3580 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v6
3581 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v7
3582 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
3583 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v10
3584 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v9
3585 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8
3586 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
3587 ; GFX9-G-O0-NEXT: s_nop 0
3588 ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
3589 ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
3590 ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
3591 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
3592 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5
3593 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v4
3594 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v7
3595 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v6
3596 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v9
3597 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
3598 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v12
3599 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v13
3600 ; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v8, v11
3601 ; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v9, v10
3602 ; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
3603 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
3604 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5
3605 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4
3606 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[6:7], v[8:9], v[10:11]
3607 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v1
3608 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v0
3609 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v3
3610 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v2
3611 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v9
3612 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
3613 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v12
3614 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v13
3615 ; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v8, v11
3616 ; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v9, v10
3617 ; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
3618 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
3619 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5
3620 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4
3621 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], v[10:11]
3622 ; GFX9-G-O0-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
3623 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5
3624 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v4
3625 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v7
3626 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v6
3627 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4
3628 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s5
3629 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], v[4:5]
3630 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v9
3631 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v10
3632 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v4
3633 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v5
3634 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 32
3635 ; GFX9-G-O0-NEXT: v_add_u32_e64 v5, v5, v6
3636 ; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v5
3637 ; GFX9-G-O0-NEXT: s_mov_b32 s10, 64
3638 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s10
3639 ; GFX9-G-O0-NEXT: v_add_u32_e64 v5, v4, v5
3640 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7
3641 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v8
3642 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v4
3643 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6
3644 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32
3645 ; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7
3646 ; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v6
3647 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[8:9]
3648 ; GFX9-G-O0-NEXT: s_mov_b32 s14, 0
3649 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v1
3650 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v0
3651 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v3
3652 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v2
3653 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s5
3654 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4
3655 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], v[5:6]
3656 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v10
3657 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v11
3658 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v5
3659 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6
3660 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32
3661 ; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7
3662 ; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v6
3663 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s10
3664 ; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v5, v6
3665 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8
3666 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v9
3667 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v5
3668 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v7
3669 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32
3670 ; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8
3671 ; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v7
3672 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9]
3673 ; GFX9-G-O0-NEXT: s_mov_b32 s13, 0
3674 ; GFX9-G-O0-NEXT: s_mov_b32 s11, 0
3675 ; GFX9-G-O0-NEXT: s_mov_b32 s12, 0
3676 ; GFX9-G-O0-NEXT: s_mov_b32 s10, 0
3677 ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v5, s[8:9], v4, v5
3678 ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
3679 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s14
3680 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s14
3681 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v6, s[8:9], v4, v6, s[8:9]
3682 ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
3683 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s13
3684 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s12
3685 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v4, v7, s[8:9]
3686 ; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
3687 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s11
3688 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10
3689 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[8:9], v4, v7, s[8:9]
3690 ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
3691 ; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], 0x7f
3692 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v5
3693 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v6
3694 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v8
3695 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v7
3696 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s9
3697 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s8
3698 ; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[12:13], v[11:12], v[13:14]
3699 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5
3700 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4
3701 ; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[10:11], v[9:10], v[11:12]
3702 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5
3703 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4
3704 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12]
3705 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 1
3706 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
3707 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[12:13]
3708 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1
3709 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
3710 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
3711 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[8:9]
3712 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1
3713 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
3714 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7]
3715 ; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v9
3716 ; GFX9-G-O0-NEXT: s_mov_b32 s7, 0x7f
3717 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
3718 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v5, v5, s7
3719 ; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v6, s6
3720 ; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v8
3721 ; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v6, v7
3722 ; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
3723 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7
3724 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s5
3725 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s4
3726 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[5:6], v[7:8]
3727 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v1
3728 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v0
3729 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3
3730 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v2
3731 ; GFX9-G-O0-NEXT: v_and_b32_e32 v0, 1, v4
3732 ; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v0
3733 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0
3734 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, 0
3735 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5
3736 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v6
3737 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[6:7]
3738 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[6:7]
3739 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3740 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2
3741 ; GFX9-G-O0-NEXT: v_and_b32_e32 v2, 1, v4
3742 ; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v2
3743 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0
3744 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0
3745 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v7
3746 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8
3747 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[6:7]
3748 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7]
3749 ; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
3750 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v2
3751 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
3752 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5
3753 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6
3754 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 1
3755 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0
3756 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
3757 ; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v5
3758 ; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v4
3759 ; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4
3760 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], -1
3761 ; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
3762 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
3763 ; GFX9-G-O0-NEXT: s_nop 0
3764 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
3765 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
3766 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
3767 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], exec
3768 ; GFX9-G-O0-NEXT: ; implicit-def: $vgpr34 : SGPR spill to VGPR lane
3769 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s4, 0
3770 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s5, 1
3771 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
3772 ; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
3773 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
3774 ; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
3775 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
3776 ; GFX9-G-O0-NEXT: s_cbranch_execz .LBB1_3
3777 ; GFX9-G-O0-NEXT: s_branch .LBB1_8
3778 ; GFX9-G-O0-NEXT: .LBB1_1: ; %Flow
3779 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
3780 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
3781 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
3782 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
3783 ; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 2
3784 ; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 3
3785 ; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
3786 ; GFX9-G-O0-NEXT: ; %bb.2: ; %Flow
3787 ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
3788 ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
3789 ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
3790 ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
3791 ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
3792 ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
3793 ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
3794 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
3795 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
3796 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
3797 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
3798 ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
3799 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
3800 ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
3801 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
3802 ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
3803 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
3804 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
3805 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
3806 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
3807 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
3808 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
3809 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
3810 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
3811 ; GFX9-G-O0-NEXT: s_branch .LBB1_5
3812 ; GFX9-G-O0-NEXT: .LBB1_3: ; %Flow2
3813 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
3814 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
3815 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
3816 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
3817 ; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 0
3818 ; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 1
3819 ; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
3820 ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
3821 ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
3822 ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
3823 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
3824 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
3825 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
3826 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
3827 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
3828 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
3829 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
3830 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
3831 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
3832 ; GFX9-G-O0-NEXT: s_branch .LBB1_9
3833 ; GFX9-G-O0-NEXT: .LBB1_4: ; %udiv-loop-exit
3834 ; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
3835 ; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
3836 ; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
3837 ; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
3838 ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
3839 ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
3840 ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
3841 ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
3842 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2)
3843 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4
3844 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5
3845 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
3846 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v6
3847 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v7
3848 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 1
3849 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4
3850 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[10:11], v0, v[2:3]
3851 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4
3852 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[4:5]
3853 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr2 killed $exec
3854 ; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec
3855 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 31
3856 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4
3857 ; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v6, v2, v3
3858 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0
3859 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v0
3860 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1
3861 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v14
3862 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15
3863 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16
3864 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17
3865 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v12
3866 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v13
3867 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v10
3868 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v11
3869 ; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v7
3870 ; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v1, v5
3871 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3872 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v5
3873 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8
3874 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v9
3875 ; GFX9-G-O0-NEXT: v_or3_b32 v4, v4, v6, v7
3876 ; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v5
3877 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
3878 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v2
3879 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
3880 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4
3881 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5
3882 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
3883 ; GFX9-G-O0-NEXT: s_nop 0
3884 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
3885 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
3886 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
3887 ; GFX9-G-O0-NEXT: s_branch .LBB1_3
3888 ; GFX9-G-O0-NEXT: .LBB1_5: ; %Flow1
3889 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
3890 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
3891 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
3892 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
3893 ; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 4
3894 ; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 5
3895 ; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
3896 ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
3897 ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
3898 ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
3899 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
3900 ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
3901 ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
3902 ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
3903 ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
3904 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
3905 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
3906 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
3907 ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
3908 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
3909 ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
3910 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
3911 ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
3912 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
3913 ; GFX9-G-O0-NEXT: s_nop 0
3914 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
3915 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
3916 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
3917 ; GFX9-G-O0-NEXT: s_branch .LBB1_4
3918 ; GFX9-G-O0-NEXT: .LBB1_6: ; %udiv-do-while
3919 ; GFX9-G-O0-NEXT: ; =>This Inner Loop Header: Depth=1
3920 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
3921 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
3922 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
3923 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
3924 ; GFX9-G-O0-NEXT: v_readlane_b32 s6, v34, 6
3925 ; GFX9-G-O0-NEXT: v_readlane_b32 s7, v34, 7
3926 ; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
3927 ; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
3928 ; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
3929 ; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
3930 ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
3931 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
3932 ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
3933 ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
3934 ; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
3935 ; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
3936 ; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
3937 ; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
3938 ; GFX9-G-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
3939 ; GFX9-G-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
3940 ; GFX9-G-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
3941 ; GFX9-G-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
3942 ; GFX9-G-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
3943 ; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3944 ; GFX9-G-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3945 ; GFX9-G-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3946 ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
3947 ; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
3948 ; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
3949 ; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
3950 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
3951 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(18)
3952 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v2
3953 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3
3954 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(16)
3955 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4
3956 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5
3957 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 1
3958 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
3959 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[20:21], v2, v[0:1]
3960 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
3961 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[3:4]
3962 ; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
3963 ; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
3964 ; GFX9-G-O0-NEXT: s_mov_b32 s9, 31
3965 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s9
3966 ; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v3, v0, v1
3967 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0
3968 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4
3969 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5
3970 ; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v2, v3
3971 ; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v0, v1
3972 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr12_vgpr13 killed $exec
3973 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v14
3974 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v15
3975 ; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
3976 ; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
3977 ; GFX9-G-O0-NEXT: s_mov_b32 s9, 31
3978 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s9
3979 ; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v3, v0, v1
3980 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0
3981 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v20
3982 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v21
3983 ; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v2, v3
3984 ; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v0, v1
3985 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v12
3986 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v13
3987 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v14
3988 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15
3989 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
3990 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[22:23], v0, v[2:3]
3991 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
3992 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[12:13]
3993 ; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr2 killed $exec
3994 ; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec
3995 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 31
3996 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
3997 ; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v14, v2, v3
3998 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0
3999 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v0
4000 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1
4001 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(10)
4002 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v28, v30
4003 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v29, v31
4004 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(8)
4005 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v32
4006 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v33
4007 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v28
4008 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v29
4009 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v22
4010 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v23
4011 ; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v15
4012 ; GFX9-G-O0-NEXT: v_or_b32_e64 v13, v1, v13
4013 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4014 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v13
4015 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v20
4016 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v21
4017 ; GFX9-G-O0-NEXT: v_or3_b32 v12, v12, v14, v15
4018 ; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v13
4019 ; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
4020 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v2
4021 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
4022 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v12
4023 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v13
4024 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
4025 ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v11, s[8:9], v11, v4
4026 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v10, v9, s[8:9]
4027 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v8, v7, s[8:9]
4028 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v6, v5, s[8:9]
4029 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 31
4030 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8
4031 ; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v8, v6, v10
4032 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 31
4033 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8
4034 ; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v6, v6, v10
4035 ; GFX9-G-O0-NEXT: s_mov_b32 s9, 1
4036 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 0
4037 ; GFX9-G-O0-NEXT: v_and_b32_e64 v12, v8, s9
4038 ; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v8, s8
4039 ; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
4040 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v10
4041 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5
4042 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4
4043 ; GFX9-G-O0-NEXT: ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
4044 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v11
4045 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v10
4046 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v24
4047 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v25
4048 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v26
4049 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v27
4050 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v22
4051 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v23
4052 ; GFX9-G-O0-NEXT: v_and_b32_e64 v11, v8, v11
4053 ; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v8, v10
4054 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v20
4055 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v21
4056 ; GFX9-G-O0-NEXT: v_and_b32_e64 v8, v6, v8
4057 ; GFX9-G-O0-NEXT: v_and_b32_e64 v6, v6, v20
4058 ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v4, s[8:9], v4, v11
4059 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v9, v10, s[8:9]
4060 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v9, s[8:9], v7, v8, s[8:9]
4061 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v5, v6, s[8:9]
4062 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
4063 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v10
4064 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v9
4065 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8
4066 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v16
4067 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v17
4068 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v18
4069 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v19
4070 ; GFX9-G-O0-NEXT: s_mov_b32 s8, -1
4071 ; GFX9-G-O0-NEXT: s_mov_b32 s12, -1
4072 ; GFX9-G-O0-NEXT: s_mov_b32 s11, -1
4073 ; GFX9-G-O0-NEXT: s_mov_b32 s10, -1
4074 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s8
4075 ; GFX9-G-O0-NEXT: v_add_co_u32_e64 v16, s[8:9], v11, v16
4076 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s12
4077 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v17, s[8:9], v10, v11, s[8:9]
4078 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s11
4079 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v19, s[8:9], v9, v10, s[8:9]
4080 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s10
4081 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v18, s[8:9], v8, v9, s[8:9]
4082 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16
4083 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17
4084 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v19
4085 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v18
4086 ; GFX9-G-O0-NEXT: v_or_b32_e64 v16, v16, v19
4087 ; GFX9-G-O0-NEXT: v_or_b32_e64 v18, v17, v18
4088 ; GFX9-G-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
4089 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v18
4090 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, s5
4091 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, s4
4092 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[18:19]
4093 ; GFX9-G-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
4094 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v3
4095 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v2
4096 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v1
4097 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v0
4098 ; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
4099 ; GFX9-G-O0-NEXT: s_nop 0
4100 ; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
4101 ; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
4102 ; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
4103 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v15
4104 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v14
4105 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v13
4106 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v12
4107 ; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
4108 ; GFX9-G-O0-NEXT: s_nop 0
4109 ; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
4110 ; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
4111 ; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
4112 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
4113 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 2
4114 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 3
4115 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
4116 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 6
4117 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 7
4118 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
4119 ; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
4120 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
4121 ; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
4122 ; GFX9-G-O0-NEXT: s_nop 0
4123 ; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
4124 ; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
4125 ; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
4126 ; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
4127 ; GFX9-G-O0-NEXT: s_nop 0
4128 ; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
4129 ; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
4130 ; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
4131 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
4132 ; GFX9-G-O0-NEXT: s_nop 0
4133 ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
4134 ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
4135 ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
4136 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
4137 ; GFX9-G-O0-NEXT: s_nop 0
4138 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
4139 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
4140 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
4141 ; GFX9-G-O0-NEXT: s_andn2_b64 exec, exec, s[4:5]
4142 ; GFX9-G-O0-NEXT: s_cbranch_execnz .LBB1_6
4143 ; GFX9-G-O0-NEXT: s_branch .LBB1_1
4144 ; GFX9-G-O0-NEXT: .LBB1_7: ; %udiv-preheader
4145 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
4146 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
4147 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
4148 ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
4149 ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
4150 ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
4151 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
4152 ; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
4153 ; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
4154 ; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
4155 ; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
4156 ; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
4157 ; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
4158 ; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
4159 ; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
4160 ; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
4161 ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
4162 ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
4163 ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
4164 ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
4165 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 64
4166 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2)
4167 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v5
4168 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v4
4169 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
4170 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v7
4171 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v6
4172 ; GFX9-G-O0-NEXT: s_mov_b32 s5, 0xffffffc0
4173 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s5
4174 ; GFX9-G-O0-NEXT: v_add_u32_e64 v4, v12, v4
4175 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4
4176 ; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v12
4177 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
4178 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4
4179 ; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v12, v6
4180 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s6
4181 ; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v12, v6
4182 ; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v12, v[20:21]
4183 ; GFX9-G-O0-NEXT: v_lshrrev_b64 v[25:26], v12, v[14:15]
4184 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[23:24], v5, v[20:21]
4185 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v25
4186 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v26
4187 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v23
4188 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v24
4189 ; GFX9-G-O0-NEXT: v_or_b32_e64 v13, v13, v22
4190 ; GFX9-G-O0-NEXT: v_or_b32_e64 v12, v5, v12
4191 ; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], 0
4192 ; GFX9-G-O0-NEXT: v_lshrrev_b64 v[20:21], v4, v[20:21]
4193 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v20
4194 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v21
4195 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v13, s[4:5]
4196 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5]
4197 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v14
4198 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v15
4199 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v13, s[6:7]
4200 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v12, v5, v12, s[6:7]
4201 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
4202 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v12
4203 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v6
4204 ; GFX9-G-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
4205 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, 0
4206 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0
4207 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[4:5]
4208 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
4209 ; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
4210 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v6
4211 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
4212 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12
4213 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v13
4214 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v16
4215 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v17
4216 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v18
4217 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v19
4218 ; GFX9-G-O0-NEXT: s_mov_b32 s4, -1
4219 ; GFX9-G-O0-NEXT: s_mov_b32 s10, -1
4220 ; GFX9-G-O0-NEXT: s_mov_b32 s7, -1
4221 ; GFX9-G-O0-NEXT: s_mov_b32 s6, -1
4222 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s4
4223 ; GFX9-G-O0-NEXT: v_add_co_u32_e64 v15, s[4:5], v15, v16
4224 ; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
4225 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s10
4226 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v14, s[4:5], v14, v15, s[4:5]
4227 ; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
4228 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s7
4229 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v13, s[4:5], v13, v14, s[4:5]
4230 ; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
4231 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s6
4232 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v12, s[4:5], v12, v13, s[4:5]
4233 ; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
4234 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], s[8:9]
4235 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[8:9]
4236 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s8, 6
4237 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s9, 7
4238 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
4239 ; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
4240 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
4241 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s7
4242 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s6
4243 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s5
4244 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s4
4245 ; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
4246 ; GFX9-G-O0-NEXT: s_nop 0
4247 ; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
4248 ; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
4249 ; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
4250 ; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
4251 ; GFX9-G-O0-NEXT: s_nop 0
4252 ; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
4253 ; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
4254 ; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
4255 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
4256 ; GFX9-G-O0-NEXT: s_nop 0
4257 ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
4258 ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
4259 ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
4260 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
4261 ; GFX9-G-O0-NEXT: s_nop 0
4262 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
4263 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
4264 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
4265 ; GFX9-G-O0-NEXT: s_branch .LBB1_6
4266 ; GFX9-G-O0-NEXT: .LBB1_8: ; %udiv-bb1
4267 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
4268 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
4269 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
4270 ; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
4271 ; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
4272 ; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
4273 ; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
4274 ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
4275 ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
4276 ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
4277 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
4278 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
4279 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 1
4280 ; GFX9-G-O0-NEXT: s_mov_b32 s10, 0
4281 ; GFX9-G-O0-NEXT: s_mov_b32 s9, 0
4282 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 0
4283 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s6
4284 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
4285 ; GFX9-G-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v1, v4
4286 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
4287 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s10
4288 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
4289 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v3, v5, s[6:7]
4290 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s9
4291 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v2, v3, s[6:7]
4292 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
4293 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v0, v2, s[6:7]
4294 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v4
4295 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v5
4296 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v7
4297 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v6
4298 ; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
4299 ; GFX9-G-O0-NEXT: s_nop 0
4300 ; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
4301 ; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
4302 ; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
4303 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0x7f
4304 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
4305 ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v3, s[6:7], v0, v1
4306 ; GFX9-G-O0-NEXT: s_mov_b32 s7, 64
4307 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v9
4308 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v8
4309 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0xffffffc0
4310 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
4311 ; GFX9-G-O0-NEXT: v_add_u32_e64 v2, v3, v0
4312 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
4313 ; GFX9-G-O0-NEXT: v_sub_u32_e64 v8, v0, v3
4314 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
4315 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
4316 ; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v3, v0
4317 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
4318 ; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v3, v0
4319 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v3, v[12:13]
4320 ; GFX9-G-O0-NEXT: v_lshrrev_b64 v[17:18], v8, v[12:13]
4321 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[15:16], v3, v[10:11]
4322 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17
4323 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v18
4324 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v15
4325 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16
4326 ; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v9, v14
4327 ; GFX9-G-O0-NEXT: v_or_b32_e64 v3, v3, v8
4328 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[12:13], v2, v[12:13]
4329 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v0
4330 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1
4331 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, 0
4332 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0
4333 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[8:9]
4334 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[8:9]
4335 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4336 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2
4337 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v12
4338 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v13
4339 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[8:9]
4340 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
4341 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
4342 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v11
4343 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7]
4344 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7]
4345 ; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
4346 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2
4347 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
4348 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8
4349 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9
4350 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
4351 ; GFX9-G-O0-NEXT: s_nop 0
4352 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
4353 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
4354 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
4355 ; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], s[4:5]
4356 ; GFX9-G-O0-NEXT: s_mov_b64 s[10:11], s[4:5]
4357 ; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v7
4358 ; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v5, v6
4359 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
4360 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6
4361 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s5
4362 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4
4363 ; GFX9-G-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[4:5], v[6:7]
4364 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
4365 ; GFX9-G-O0-NEXT: s_nop 0
4366 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
4367 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
4368 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
4369 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
4370 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s9
4371 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s10
4372 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s11
4373 ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
4374 ; GFX9-G-O0-NEXT: s_nop 0
4375 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
4376 ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
4377 ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
4378 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], exec
4379 ; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
4380 ; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
4381 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 4
4382 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 5
4383 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
4384 ; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
4385 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
4386 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
4387 ; GFX9-G-O0-NEXT: s_cbranch_execz .LBB1_5
4388 ; GFX9-G-O0-NEXT: s_branch .LBB1_7
4389 ; GFX9-G-O0-NEXT: .LBB1_9: ; %udiv-end
4390 ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
4391 ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
4392 ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
4393 ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
4394 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
4395 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v3
4396 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2)
4397 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v4
4398 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
4399 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5
4400 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
4401 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6
4402 ; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
4403 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
4404 ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
4405 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
4406 ; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31]
4490 ; GFX9-G-LABEL: v_sdiv_i128_v_pow2k:
4491 ; GFX9-G: ; %bb.0:
4492 ; GFX9-G-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4493 ; GFX9-G-NEXT: v_ashrrev_i32_e32 v4, 31, v3
4494 ; GFX9-G-NEXT: v_mov_b32_e32 v5, v4
4495 ; GFX9-G-NEXT: v_lshrrev_b64 v[4:5], 31, v[4:5]
4496 ; GFX9-G-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4
4497 ; GFX9-G-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v5, vcc
4498 ; GFX9-G-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
4499 ; GFX9-G-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v3, vcc
4500 ; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2]
4501 ; GFX9-G-NEXT: v_lshrrev_b32_e32 v3, 1, v4
4502 ; GFX9-G-NEXT: v_or_b32_e32 v0, v3, v0
4503 ; GFX9-G-NEXT: v_ashrrev_i32_e32 v3, 31, v2
4504 ; GFX9-G-NEXT: v_ashrrev_i32_e32 v2, 1, v2
4505 ; GFX9-G-NEXT: s_setpc_b64 s[30:31]
4507 ; GFX9-G-O0-LABEL: v_sdiv_i128_v_pow2k:
4508 ; GFX9-G-O0: ; %bb.0:
4509 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4510 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v0
4511 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 31
4512 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4
4513 ; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v0, v0, v3
4514 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v0
4515 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v0
4516 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 31
4517 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4
4518 ; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v0, v[5:6]
4519 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6
4520 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v7
4521 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 0
4522 ; GFX9-G-O0-NEXT: s_mov_b32 s5, 0
4523 ; GFX9-G-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v4, v5
4524 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v1, s[6:7], v1, v0, s[6:7]
4525 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
4526 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v2, v0, s[6:7]
4527 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s5
4528 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v4, s[6:7], v3, v0, s[6:7]
4529 ; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
4530 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v4
4531 ; GFX9-G-O0-NEXT: s_mov_b32 s5, 1
4532 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s5
4533 ; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v0, v0, v1
4534 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0
4535 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4
4536 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[5:6], v2, v[5:6]
4537 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5
4538 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v6
4539 ; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v3
4540 ; GFX9-G-O0-NEXT: v_or_b32_e64 v1, v1, v2
4541 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 31
4542 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4
4543 ; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v3, v2, v4
4544 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 1
4545 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4
4546 ; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v2, v2, v4
4547 ; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31]
4595 ; GFX9-G-LABEL: v_udiv_i128_v_pow2k:
4596 ; GFX9-G: ; %bb.0:
4597 ; GFX9-G-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4598 ; GFX9-G-NEXT: v_mov_b32_e32 v4, v1
4599 ; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], 31, v[2:3]
4600 ; GFX9-G-NEXT: v_lshrrev_b32_e32 v2, 1, v4
4601 ; GFX9-G-NEXT: v_or_b32_e32 v0, v2, v0
4602 ; GFX9-G-NEXT: v_lshrrev_b32_e32 v2, 1, v3
4603 ; GFX9-G-NEXT: v_mov_b32_e32 v3, 0
4604 ; GFX9-G-NEXT: s_setpc_b64 s[30:31]
4606 ; GFX9-G-O0-LABEL: v_udiv_i128_v_pow2k:
4607 ; GFX9-G-O0: ; %bb.0:
4608 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4609 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v2
4610 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
4611 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v3
4612 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 1
4613 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4
4614 ; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v0, v0, v1
4615 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0
4616 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 31
4617 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4
4618 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[5:6], v2, v[4:5]
4619 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5
4620 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v6
4621 ; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v4
4622 ; GFX9-G-O0-NEXT: v_or_b32_e64 v1, v1, v2
4623 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 1
4624 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4
4625 ; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v2, v2, v3
4626 ; GFX9-G-O0-NEXT: s_mov_b32 s4, 0
4627 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s4
4628 ; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31]