1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9 %s 3; RUN: llc -O0 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9-O0 %s 4 5; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9-G %s 6; RUN: llc -O0 -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9-G-O0 %s 7 8define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { 9; GFX9-LABEL: v_sdiv_i128_vv: 10; GFX9: ; %bb.0: ; %_udiv-special-cases 11; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, 0, v0 13; GFX9-NEXT: v_subb_co_u32_e32 v9, vcc, 0, v1, vcc 14; GFX9-NEXT: v_subb_co_u32_e32 v10, vcc, 0, v2, vcc 15; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc 16; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[2:3] 17; GFX9-NEXT: v_ashrrev_i32_e32 v16, 31, v3 18; GFX9-NEXT: v_cndmask_b32_e32 v9, v1, v9, vcc 19; GFX9-NEXT: v_cndmask_b32_e32 v8, v0, v8, vcc 20; GFX9-NEXT: v_cndmask_b32_e32 v11, v3, v11, vcc 21; GFX9-NEXT: v_cndmask_b32_e32 v10, v2, v10, vcc 22; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, 0, v4 23; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, 0, v5, vcc 24; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, 0, v6, vcc 25; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, 0, v7, vcc 26; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[6:7] 27; GFX9-NEXT: v_ashrrev_i32_e32 v17, 31, v7 28; GFX9-NEXT: v_cndmask_b32_e32 v20, v5, v1, vcc 29; GFX9-NEXT: v_cndmask_b32_e32 v21, v4, v0, vcc 30; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc 31; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc 32; GFX9-NEXT: v_or_b32_e32 v3, v20, v1 33; GFX9-NEXT: v_or_b32_e32 v2, v21, v0 34; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] 35; GFX9-NEXT: v_or_b32_e32 v3, v9, v11 36; GFX9-NEXT: v_or_b32_e32 v2, v8, v10 37; GFX9-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[2:3] 38; GFX9-NEXT: v_ffbh_u32_e32 v2, v0 39; GFX9-NEXT: v_add_u32_e32 v2, 32, v2 40; GFX9-NEXT: v_ffbh_u32_e32 v3, v1 41; GFX9-NEXT: v_min_u32_e32 v2, v2, v3 42; GFX9-NEXT: v_ffbh_u32_e32 v3, v21 43; GFX9-NEXT: v_add_u32_e32 v3, 32, v3 44; GFX9-NEXT: v_ffbh_u32_e32 v4, v20 45; GFX9-NEXT: v_min_u32_e32 v3, v3, v4 46; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 47; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, 64, v3 48; GFX9-NEXT: v_addc_co_u32_e64 v4, s[6:7], 0, 0, vcc 49; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 50; GFX9-NEXT: v_ffbh_u32_e32 v6, v11 51; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 52; GFX9-NEXT: v_ffbh_u32_e32 v3, v10 53; GFX9-NEXT: v_add_u32_e32 v3, 32, v3 54; GFX9-NEXT: v_min_u32_e32 v3, v3, v6 55; GFX9-NEXT: v_ffbh_u32_e32 v6, v8 56; GFX9-NEXT: v_add_u32_e32 v6, 32, v6 57; GFX9-NEXT: v_ffbh_u32_e32 v7, v9 58; GFX9-NEXT: v_min_u32_e32 v6, v6, v7 59; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc 60; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, 64, v6 61; GFX9-NEXT: v_addc_co_u32_e64 v7, s[6:7], 0, 0, vcc 62; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] 63; GFX9-NEXT: v_mov_b32_e32 v5, 0 64; GFX9-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc 65; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, 0, vcc 66; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3 67; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v7, vcc 68; GFX9-NEXT: v_subbrev_co_u32_e32 v4, vcc, 0, v5, vcc 69; GFX9-NEXT: v_subbrev_co_u32_e32 v5, vcc, 0, v5, vcc 70; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f 71; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3] 72; GFX9-NEXT: v_mov_b32_e32 v18, v16 73; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 74; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 75; GFX9-NEXT: v_mov_b32_e32 v19, v17 76; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 77; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] 78; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc 79; GFX9-NEXT: v_and_b32_e32 v6, 1, v6 80; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 81; GFX9-NEXT: v_xor_b32_e32 v6, 0x7f, v2 82; GFX9-NEXT: v_or_b32_e32 v7, v3, v5 83; GFX9-NEXT: v_or_b32_e32 v6, v6, v4 84; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], vcc 85; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] 86; GFX9-NEXT: s_xor_b64 s[6:7], s[4:5], -1 87; GFX9-NEXT: v_cndmask_b32_e64 v13, v11, 0, s[4:5] 88; GFX9-NEXT: v_cndmask_b32_e64 v12, v10, 0, s[4:5] 89; GFX9-NEXT: v_cndmask_b32_e64 v7, v9, 0, s[4:5] 90; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, 0, s[4:5] 91; GFX9-NEXT: s_and_b64 s[4:5], s[6:7], vcc 92; GFX9-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] 93; GFX9-NEXT: s_cbranch_execz .LBB0_6 94; GFX9-NEXT: ; %bb.1: ; %udiv-bb1 95; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, 1, v2 96; GFX9-NEXT: v_addc_co_u32_e32 v23, vcc, 0, v3, vcc 97; GFX9-NEXT: v_addc_co_u32_e32 v24, vcc, 0, v4, vcc 98; GFX9-NEXT: v_sub_u32_e32 v7, 0x7f, v2 99; GFX9-NEXT: v_addc_co_u32_e32 v25, vcc, 0, v5, vcc 100; GFX9-NEXT: v_sub_u32_e32 v12, 64, v7 101; GFX9-NEXT: v_or_b32_e32 v4, v23, v25 102; GFX9-NEXT: v_or_b32_e32 v3, v22, v24 103; GFX9-NEXT: v_lshlrev_b64 v[5:6], v7, v[10:11] 104; GFX9-NEXT: v_lshrrev_b64 v[12:13], v12, v[8:9] 105; GFX9-NEXT: v_sub_u32_e32 v2, 63, v2 106; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[3:4] 107; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[8:9] 108; GFX9-NEXT: v_or_b32_e32 v4, v6, v13 109; GFX9-NEXT: v_or_b32_e32 v5, v5, v12 110; GFX9-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v7 111; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5] 112; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] 113; GFX9-NEXT: v_lshlrev_b64 v[4:5], v7, v[8:9] 114; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7 115; GFX9-NEXT: v_mov_b32_e32 v6, 0 116; GFX9-NEXT: v_mov_b32_e32 v12, 0 117; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[6:7] 118; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[6:7] 119; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, v5, s[4:5] 120; GFX9-NEXT: v_mov_b32_e32 v7, 0 121; GFX9-NEXT: v_mov_b32_e32 v13, 0 122; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5] 123; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 124; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 125; GFX9-NEXT: s_cbranch_execz .LBB0_5 126; GFX9-NEXT: ; %bb.2: ; %udiv-preheader 127; GFX9-NEXT: v_sub_u32_e32 v12, 64, v22 128; GFX9-NEXT: v_lshrrev_b64 v[6:7], v22, v[8:9] 129; GFX9-NEXT: v_lshlrev_b64 v[12:13], v12, v[10:11] 130; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v22 131; GFX9-NEXT: v_or_b32_e32 v12, v6, v12 132; GFX9-NEXT: v_subrev_u32_e32 v6, 64, v22 133; GFX9-NEXT: v_or_b32_e32 v13, v7, v13 134; GFX9-NEXT: v_lshrrev_b64 v[6:7], v6, v[10:11] 135; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v22 136; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v13, vcc 137; GFX9-NEXT: v_cndmask_b32_e64 v9, v7, v9, s[4:5] 138; GFX9-NEXT: v_cndmask_b32_e32 v12, v6, v12, vcc 139; GFX9-NEXT: v_lshrrev_b64 v[6:7], v22, v[10:11] 140; GFX9-NEXT: v_cndmask_b32_e64 v8, v12, v8, s[4:5] 141; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v7, vcc 142; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v6, vcc 143; GFX9-NEXT: v_add_co_u32_e32 v26, vcc, -1, v21 144; GFX9-NEXT: v_addc_co_u32_e32 v27, vcc, -1, v20, vcc 145; GFX9-NEXT: v_addc_co_u32_e32 v28, vcc, -1, v0, vcc 146; GFX9-NEXT: v_mov_b32_e32 v14, 0 147; GFX9-NEXT: v_mov_b32_e32 v12, 0 148; GFX9-NEXT: v_addc_co_u32_e32 v29, vcc, -1, v1, vcc 149; GFX9-NEXT: s_mov_b64 s[4:5], 0 150; GFX9-NEXT: v_mov_b32_e32 v15, 0 151; GFX9-NEXT: v_mov_b32_e32 v13, 0 152; GFX9-NEXT: v_mov_b32_e32 v7, 0 153; GFX9-NEXT: .LBB0_3: ; %udiv-do-while 154; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 155; GFX9-NEXT: v_lshlrev_b64 v[30:31], 1, v[4:5] 156; GFX9-NEXT: v_lshrrev_b32_e32 v6, 31, v5 157; GFX9-NEXT: v_or_b32_e32 v4, v14, v30 158; GFX9-NEXT: v_lshrrev_b32_e32 v14, 31, v9 159; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[8:9] 160; GFX9-NEXT: v_or_b32_e32 v5, v15, v31 161; GFX9-NEXT: v_lshlrev_b64 v[10:11], 1, v[10:11] 162; GFX9-NEXT: v_lshrrev_b32_e32 v15, 31, v3 163; GFX9-NEXT: v_or_b32_e32 v8, v8, v15 164; GFX9-NEXT: v_or_b32_e32 v10, v10, v14 165; GFX9-NEXT: v_sub_co_u32_e32 v14, vcc, v26, v8 166; GFX9-NEXT: v_subb_co_u32_e32 v14, vcc, v27, v9, vcc 167; GFX9-NEXT: v_subb_co_u32_e32 v14, vcc, v28, v10, vcc 168; GFX9-NEXT: v_subb_co_u32_e32 v14, vcc, v29, v11, vcc 169; GFX9-NEXT: v_ashrrev_i32_e32 v30, 31, v14 170; GFX9-NEXT: v_and_b32_e32 v14, v30, v21 171; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 172; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, v8, v14 173; GFX9-NEXT: v_and_b32_e32 v14, v30, v20 174; GFX9-NEXT: v_subb_co_u32_e32 v9, vcc, v9, v14, vcc 175; GFX9-NEXT: v_or3_b32 v2, v2, v6, v12 176; GFX9-NEXT: v_and_b32_e32 v6, v30, v0 177; GFX9-NEXT: v_and_b32_e32 v14, v30, v1 178; GFX9-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v6, vcc 179; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v14, vcc 180; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, -1, v22 181; GFX9-NEXT: v_addc_co_u32_e32 v23, vcc, -1, v23, vcc 182; GFX9-NEXT: v_addc_co_u32_e32 v24, vcc, -1, v24, vcc 183; GFX9-NEXT: v_addc_co_u32_e32 v25, vcc, -1, v25, vcc 184; GFX9-NEXT: v_or_b32_e32 v14, v22, v24 185; GFX9-NEXT: v_or_b32_e32 v15, v23, v25 186; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] 187; GFX9-NEXT: v_and_b32_e32 v6, 1, v30 188; GFX9-NEXT: v_mov_b32_e32 v15, v7 189; GFX9-NEXT: v_or3_b32 v3, v3, 0, v13 190; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 191; GFX9-NEXT: v_mov_b32_e32 v14, v6 192; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 193; GFX9-NEXT: s_cbranch_execnz .LBB0_3 194; GFX9-NEXT: ; %bb.4: ; %Flow 195; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 196; GFX9-NEXT: .LBB0_5: ; %Flow2 197; GFX9-NEXT: s_or_b64 exec, exec, s[6:7] 198; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[4:5] 199; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 200; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5 201; GFX9-NEXT: v_or3_b32 v13, v3, 0, v13 202; GFX9-NEXT: v_or3_b32 v12, v2, v4, v12 203; GFX9-NEXT: v_or_b32_e32 v7, v7, v1 204; GFX9-NEXT: v_or_b32_e32 v6, v6, v0 205; GFX9-NEXT: .LBB0_6: ; %Flow3 206; GFX9-NEXT: s_or_b64 exec, exec, s[8:9] 207; GFX9-NEXT: v_xor_b32_e32 v2, v17, v16 208; GFX9-NEXT: v_xor_b32_e32 v3, v19, v18 209; GFX9-NEXT: v_xor_b32_e32 v0, v6, v2 210; GFX9-NEXT: v_xor_b32_e32 v1, v7, v3 211; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 212; GFX9-NEXT: v_xor_b32_e32 v5, v12, v2 213; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc 214; GFX9-NEXT: v_xor_b32_e32 v4, v13, v3 215; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v5, v2, vcc 216; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc 217; GFX9-NEXT: s_setpc_b64 s[30:31] 218; 219; GFX9-O0-LABEL: v_sdiv_i128_vv: 220; GFX9-O0: ; %bb.0: ; %_udiv-special-cases 221; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 222; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 223; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill 224; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 225; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill 226; GFX9-O0-NEXT: v_mov_b32_e32 v20, v6 227; GFX9-O0-NEXT: v_mov_b32_e32 v7, v4 228; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2 229; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 230; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload 231; GFX9-O0-NEXT: ; implicit-def: $sgpr4 232; GFX9-O0-NEXT: ; implicit-def: $sgpr4 233; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec 234; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 235; GFX9-O0-NEXT: v_mov_b32_e32 v21, v0 236; GFX9-O0-NEXT: ; implicit-def: $sgpr4 237; GFX9-O0-NEXT: ; implicit-def: $sgpr4 238; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec 239; GFX9-O0-NEXT: v_mov_b32_e32 v8, v5 240; GFX9-O0-NEXT: ; implicit-def: $sgpr4 241; GFX9-O0-NEXT: ; implicit-def: $sgpr4 242; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec 243; GFX9-O0-NEXT: v_mov_b32_e32 v10, v3 244; GFX9-O0-NEXT: ; implicit-def: $sgpr4 245; GFX9-O0-NEXT: ; implicit-def: $sgpr4 246; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 247; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 248; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 249; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 250; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 251; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 252; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 253; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 254; GFX9-O0-NEXT: v_mov_b32_e32 v13, v9 255; GFX9-O0-NEXT: v_mov_b32_e32 v19, v10 256; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 257; GFX9-O0-NEXT: ; implicit-def: $vgpr30 : SGPR spill to VGPR lane 258; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 0 259; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 1 260; GFX9-O0-NEXT: s_mov_b32 s10, s6 261; GFX9-O0-NEXT: v_writelane_b32 v30, s10, 2 262; GFX9-O0-NEXT: s_mov_b32 s11, s7 263; GFX9-O0-NEXT: v_writelane_b32 v30, s11, 3 264; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, s10, v1 265; GFX9-O0-NEXT: v_mov_b32_e32 v0, s11 266; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v0, v2, vcc 267; GFX9-O0-NEXT: v_mov_b32_e32 v0, s10 268; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v0, v13, vcc 269; GFX9-O0-NEXT: v_mov_b32_e32 v0, s11 270; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v19, vcc 271; GFX9-O0-NEXT: ; implicit-def: $sgpr4 272; GFX9-O0-NEXT: ; implicit-def: $sgpr4 273; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 274; GFX9-O0-NEXT: v_mov_b32_e32 v6, v3 275; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 276; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] 277; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[9:10], s[4:5] 278; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5] 279; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 280; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 281; GFX9-O0-NEXT: ; implicit-def: $sgpr8 282; GFX9-O0-NEXT: ; implicit-def: $sgpr8 283; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2 284; GFX9-O0-NEXT: v_mov_b32_e32 v16, v3 285; GFX9-O0-NEXT: ; implicit-def: $sgpr8 286; GFX9-O0-NEXT: ; implicit-def: $sgpr8 287; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 288; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 289; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5 290; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v19, v0, s[4:5] 291; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 292; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v13, v0, s[4:5] 293; GFX9-O0-NEXT: ; implicit-def: $sgpr4 294; GFX9-O0-NEXT: ; implicit-def: $sgpr4 295; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 296; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 297; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 298; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr7_vgpr8 killed $exec 299; GFX9-O0-NEXT: v_mov_b32_e32 v12, v20 300; GFX9-O0-NEXT: v_mov_b32_e32 v14, v21 301; GFX9-O0-NEXT: v_sub_co_u32_e32 v17, vcc, s10, v6 302; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11 303; GFX9-O0-NEXT: v_subb_co_u32_e32 v9, vcc, v7, v8, vcc 304; GFX9-O0-NEXT: v_mov_b32_e32 v7, s10 305; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v7, v12, vcc 306; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11 307; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v14, vcc 308; GFX9-O0-NEXT: ; implicit-def: $sgpr4 309; GFX9-O0-NEXT: ; implicit-def: $sgpr4 310; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec 311; GFX9-O0-NEXT: v_mov_b32_e32 v18, v9 312; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 313; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] 314; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[20:21], s[4:5] 315; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v8, v9, s[4:5] 316; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 317; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v8, s[4:5] 318; GFX9-O0-NEXT: ; implicit-def: $sgpr8 319; GFX9-O0-NEXT: ; implicit-def: $sgpr8 320; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6 321; GFX9-O0-NEXT: v_mov_b32_e32 v18, v9 322; GFX9-O0-NEXT: ; implicit-def: $sgpr8 323; GFX9-O0-NEXT: ; implicit-def: $sgpr8 324; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec 325; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7 326; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11 327; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v14, v7, s[4:5] 328; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 329; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v12, v7, s[4:5] 330; GFX9-O0-NEXT: ; implicit-def: $sgpr4 331; GFX9-O0-NEXT: ; implicit-def: $sgpr4 332; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7 333; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8 334; GFX9-O0-NEXT: v_xor_b32_e64 v14, v14, v19 335; GFX9-O0-NEXT: v_xor_b32_e64 v12, v12, v13 336; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 337; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14 338; GFX9-O0-NEXT: s_mov_b32 s4, 63 339; GFX9-O0-NEXT: v_ashrrev_i64 v[12:13], s4, v[12:13] 340; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill 341; GFX9-O0-NEXT: s_nop 0 342; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill 343; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill 344; GFX9-O0-NEXT: s_nop 0 345; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill 346; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 347; GFX9-O0-NEXT: v_mov_b32_e32 v12, v10 348; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill 349; GFX9-O0-NEXT: s_nop 0 350; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill 351; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17 352; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18 353; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill 354; GFX9-O0-NEXT: s_nop 0 355; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill 356; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5 357; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4 358; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill 359; GFX9-O0-NEXT: s_nop 0 360; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill 361; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15 362; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 363; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill 364; GFX9-O0-NEXT: s_nop 0 365; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill 366; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 367; GFX9-O0-NEXT: v_mov_b32_e32 v12, v18 368; GFX9-O0-NEXT: v_or_b32_e64 v14, v12, v13 369; GFX9-O0-NEXT: v_mov_b32_e32 v13, v10 370; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17 371; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v13 372; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 373; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14 374; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[12:13], s[6:7] 375; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5 376; GFX9-O0-NEXT: v_mov_b32_e32 v12, v16 377; GFX9-O0-NEXT: v_or_b32_e64 v14, v12, v13 378; GFX9-O0-NEXT: v_mov_b32_e32 v13, v4 379; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15 380; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v13 381; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 382; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14 383; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[12:13], s[6:7] 384; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 385; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 386; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[10:11], s[8:9] 387; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7 388; GFX9-O0-NEXT: s_mov_b32 s13, 32 389; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s13 390; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8 391; GFX9-O0-NEXT: v_min_u32_e64 v7, v7, v8 392; GFX9-O0-NEXT: s_mov_b32 s12, 0 393; GFX9-O0-NEXT: ; implicit-def: $sgpr14 394; GFX9-O0-NEXT: v_mov_b32_e32 v10, s12 395; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec 396; GFX9-O0-NEXT: v_mov_b32_e32 v8, v10 397; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8 398; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6 399; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s13 400; GFX9-O0-NEXT: v_ffbh_u32_e64 v9, v9 401; GFX9-O0-NEXT: v_min_u32_e64 v12, v6, v9 402; GFX9-O0-NEXT: ; implicit-def: $sgpr14 403; GFX9-O0-NEXT: v_mov_b32_e32 v6, s12 404; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 405; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6 406; GFX9-O0-NEXT: s_mov_b64 s[14:15], 64 407; GFX9-O0-NEXT: v_mov_b32_e32 v9, v12 408; GFX9-O0-NEXT: s_mov_b32 s16, s14 409; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13 410; GFX9-O0-NEXT: s_mov_b32 s18, s15 411; GFX9-O0-NEXT: v_add_co_u32_e64 v9, s[16:17], v9, s16 412; GFX9-O0-NEXT: v_mov_b32_e32 v10, s18 413; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[16:17], v6, v10, s[16:17] 414; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec 415; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6 416; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 417; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v11, s[8:9] 418; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7 419; GFX9-O0-NEXT: v_mov_b32_e32 v7, v9 420; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v7, v8, s[8:9] 421; GFX9-O0-NEXT: ; implicit-def: $sgpr8 422; GFX9-O0-NEXT: ; implicit-def: $sgpr8 423; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 424; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6 425; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 426; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[4:5], s[8:9] 427; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0 428; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13 429; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1 430; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5 431; GFX9-O0-NEXT: ; implicit-def: $sgpr16 432; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12 433; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 434; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 435; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 436; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2 437; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13 438; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3 439; GFX9-O0-NEXT: v_min_u32_e64 v11, v4, v10 440; GFX9-O0-NEXT: ; implicit-def: $sgpr13 441; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12 442; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec 443; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4 444; GFX9-O0-NEXT: v_mov_b32_e32 v10, v11 445; GFX9-O0-NEXT: s_mov_b32 s12, s14 446; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 447; GFX9-O0-NEXT: s_mov_b32 s14, s15 448; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[12:13], v10, s12 449; GFX9-O0-NEXT: v_mov_b32_e32 v11, s14 450; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v11, s[12:13] 451; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec 452; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4 453; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11 454; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[8:9] 455; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 456; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 457; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9] 458; GFX9-O0-NEXT: ; implicit-def: $sgpr8 459; GFX9-O0-NEXT: ; implicit-def: $sgpr8 460; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 461; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 462; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 463; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 464; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec 465; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 466; GFX9-O0-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v7 467; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v5, v6, vcc 468; GFX9-O0-NEXT: v_mov_b32_e32 v6, s10 469; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10 470; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v6, vcc 471; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11 472; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 473; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc 474; GFX9-O0-NEXT: ; implicit-def: $sgpr8 475; GFX9-O0-NEXT: ; implicit-def: $sgpr8 476; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 477; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 478; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill 479; GFX9-O0-NEXT: s_nop 0 480; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill 481; GFX9-O0-NEXT: ; implicit-def: $sgpr8 482; GFX9-O0-NEXT: ; implicit-def: $sgpr8 483; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec 484; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6 485; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill 486; GFX9-O0-NEXT: s_nop 0 487; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill 488; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7] 489; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f 490; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13] 491; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15] 492; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7] 493; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15] 494; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9] 495; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6 496; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1 497; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] 498; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1 499; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] 500; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 501; GFX9-O0-NEXT: s_mov_b32 s14, s13 502; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14 503; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13 504; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12 505; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 506; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 507; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 508; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 509; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v9 510; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec 511; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 512; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 513; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 514; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 515; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7] 516; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 517; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9] 518; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 519; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9] 520; GFX9-O0-NEXT: ; implicit-def: $sgpr12 521; GFX9-O0-NEXT: ; implicit-def: $sgpr12 522; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 523; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 524; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 525; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9] 526; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10 527; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9] 528; GFX9-O0-NEXT: ; implicit-def: $sgpr8 529; GFX9-O0-NEXT: ; implicit-def: $sgpr8 530; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 531; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 532; GFX9-O0-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7] 533; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill 534; GFX9-O0-NEXT: s_nop 0 535; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 536; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 537; GFX9-O0-NEXT: s_nop 0 538; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 539; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec 540; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 4 541; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 5 542; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 543; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 544; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 545; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 546; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 547; GFX9-O0-NEXT: s_cbranch_execz .LBB0_3 548; GFX9-O0-NEXT: s_branch .LBB0_8 549; GFX9-O0-NEXT: .LBB0_1: ; %Flow 550; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 551; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 552; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 553; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 554; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6 555; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7 556; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] 557; GFX9-O0-NEXT: ; %bb.2: ; %Flow 558; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload 559; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload 560; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload 561; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload 562; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload 563; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload 564; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload 565; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload 566; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 567; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill 568; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 569; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill 570; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 571; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill 572; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 573; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill 574; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 575; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill 576; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 577; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill 578; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 579; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill 580; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 581; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill 582; GFX9-O0-NEXT: s_branch .LBB0_5 583; GFX9-O0-NEXT: .LBB0_3: ; %Flow2 584; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 585; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 586; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 587; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 588; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4 589; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5 590; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] 591; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload 592; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload 593; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 594; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload 595; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 596; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill 597; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 598; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill 599; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill 600; GFX9-O0-NEXT: s_nop 0 601; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill 602; GFX9-O0-NEXT: s_branch .LBB0_9 603; GFX9-O0-NEXT: .LBB0_4: ; %udiv-loop-exit 604; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload 605; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload 606; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload 607; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload 608; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload 609; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload 610; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload 611; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload 612; GFX9-O0-NEXT: s_mov_b32 s4, 1 613; GFX9-O0-NEXT: s_waitcnt vmcnt(2) 614; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[0:1] 615; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 616; GFX9-O0-NEXT: v_lshlrev_b64 v[9:10], s4, v[9:10] 617; GFX9-O0-NEXT: s_mov_b32 s4, 63 618; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 619; GFX9-O0-NEXT: v_mov_b32_e32 v11, v1 620; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 621; GFX9-O0-NEXT: v_mov_b32_e32 v12, v8 622; GFX9-O0-NEXT: v_or3_b32 v4, v4, v11, v12 623; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 624; GFX9-O0-NEXT: v_mov_b32_e32 v0, v9 625; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7 626; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 627; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 628; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 629; GFX9-O0-NEXT: v_mov_b32_e32 v4, v6 630; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v7 631; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 632; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 633; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 634; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 635; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 636; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill 637; GFX9-O0-NEXT: s_nop 0 638; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 639; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 640; GFX9-O0-NEXT: s_nop 0 641; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 642; GFX9-O0-NEXT: s_branch .LBB0_3 643; GFX9-O0-NEXT: .LBB0_5: ; %Flow1 644; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 645; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 646; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 647; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 648; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 8 649; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 9 650; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] 651; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload 652; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload 653; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload 654; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload 655; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload 656; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload 657; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload 658; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload 659; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 660; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill 661; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 662; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill 663; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill 664; GFX9-O0-NEXT: s_nop 0 665; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill 666; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill 667; GFX9-O0-NEXT: s_nop 0 668; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill 669; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill 670; GFX9-O0-NEXT: s_nop 0 671; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill 672; GFX9-O0-NEXT: s_branch .LBB0_4 673; GFX9-O0-NEXT: .LBB0_6: ; %udiv-do-while 674; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1 675; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 676; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 677; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 678; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 679; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 10 680; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 11 681; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload 682; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload 683; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload 684; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload 685; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload 686; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload 687; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload 688; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload 689; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload 690; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload 691; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload 692; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload 693; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload 694; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload 695; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload 696; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload 697; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload 698; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload 699; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload 700; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload 701; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload 702; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload 703; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload 704; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload 705; GFX9-O0-NEXT: s_mov_b32 s4, 63 706; GFX9-O0-NEXT: s_waitcnt vmcnt(16) 707; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3] 708; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29 709; GFX9-O0-NEXT: s_mov_b32 s5, 1 710; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s5, v[22:23] 711; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23 712; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 713; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28 714; GFX9-O0-NEXT: v_mov_b32_e32 v5, v22 715; GFX9-O0-NEXT: v_or_b32_e64 v22, v5, v10 716; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec 717; GFX9-O0-NEXT: v_mov_b32_e32 v23, v4 718; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[2:3] 719; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], s4, v[6:7] 720; GFX9-O0-NEXT: v_mov_b32_e32 v2, v29 721; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 722; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 723; GFX9-O0-NEXT: v_mov_b32_e32 v3, v28 724; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec 725; GFX9-O0-NEXT: v_or_b32_e64 v4, v3, v4 726; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 727; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 728; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s5, v[0:1] 729; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[6:7] 730; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 731; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 732; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29 733; GFX9-O0-NEXT: s_waitcnt vmcnt(10) 734; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27 735; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10 736; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 737; GFX9-O0-NEXT: v_mov_b32_e32 v0, v28 738; GFX9-O0-NEXT: v_mov_b32_e32 v7, v26 739; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7 740; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 741; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 742; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 743; GFX9-O0-NEXT: s_waitcnt vmcnt(8) 744; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25 745; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 746; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 747; GFX9-O0-NEXT: v_mov_b32_e32 v2, v24 748; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 749; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 750; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 751; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 752; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 753; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22 754; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 755; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 756; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 757; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 758; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15 759; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6 760; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 761; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc 762; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc 763; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc 764; GFX9-O0-NEXT: ; implicit-def: $sgpr5 765; GFX9-O0-NEXT: ; implicit-def: $sgpr5 766; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec 767; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 768; GFX9-O0-NEXT: v_ashrrev_i64 v[13:14], s4, v[11:12] 769; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 770; GFX9-O0-NEXT: s_mov_b64 s[4:5], 1 771; GFX9-O0-NEXT: s_mov_b32 s8, s5 772; GFX9-O0-NEXT: v_and_b32_e64 v12, v7, s8 773; GFX9-O0-NEXT: v_mov_b32_e32 v11, v13 774; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 775; GFX9-O0-NEXT: v_and_b32_e64 v14, v11, s4 776; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 777; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12 778; GFX9-O0-NEXT: v_mov_b32_e32 v12, 0 779; GFX9-O0-NEXT: v_mov_b32_e32 v13, 0 780; GFX9-O0-NEXT: v_mov_b32_e32 v22, v21 781; GFX9-O0-NEXT: v_and_b32_e64 v22, v7, v22 782; GFX9-O0-NEXT: v_and_b32_e64 v20, v11, v20 783; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec 784; GFX9-O0-NEXT: v_mov_b32_e32 v21, v22 785; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19 786; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v22 787; GFX9-O0-NEXT: v_and_b32_e64 v22, v11, v18 788; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec 789; GFX9-O0-NEXT: v_mov_b32_e32 v23, v7 790; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22 791; GFX9-O0-NEXT: v_mov_b32_e32 v18, v23 792; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 793; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21 794; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v19 795; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v18, vcc 796; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v11, vcc 797; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v7, vcc 798; GFX9-O0-NEXT: ; implicit-def: $sgpr4 799; GFX9-O0-NEXT: ; implicit-def: $sgpr4 800; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 801; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 802; GFX9-O0-NEXT: ; implicit-def: $sgpr4 803; GFX9-O0-NEXT: ; implicit-def: $sgpr4 804; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 805; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 806; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8 807; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 killed $vgpr8_vgpr9 killed $exec 808; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 809; GFX9-O0-NEXT: s_mov_b32 s5, s8 810; GFX9-O0-NEXT: s_mov_b32 s4, s9 811; GFX9-O0-NEXT: v_mov_b32_e32 v10, v16 812; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 813; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 814; GFX9-O0-NEXT: v_add_co_u32_e32 v19, vcc, v11, v16 815; GFX9-O0-NEXT: v_mov_b32_e32 v11, s4 816; GFX9-O0-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v11, vcc 817; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5 818; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v10, v11, vcc 819; GFX9-O0-NEXT: v_mov_b32_e32 v10, s4 820; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v8, v10, vcc 821; GFX9-O0-NEXT: ; implicit-def: $sgpr4 822; GFX9-O0-NEXT: ; implicit-def: $sgpr4 823; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec 824; GFX9-O0-NEXT: v_mov_b32_e32 v20, v9 825; GFX9-O0-NEXT: ; implicit-def: $sgpr4 826; GFX9-O0-NEXT: ; implicit-def: $sgpr4 827; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 828; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8 829; GFX9-O0-NEXT: v_mov_b32_e32 v8, v16 830; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17 831; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19 832; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 833; GFX9-O0-NEXT: v_mov_b32_e32 v21, v17 834; GFX9-O0-NEXT: v_mov_b32_e32 v18, v20 835; GFX9-O0-NEXT: v_or_b32_e64 v18, v18, v21 836; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16 837; GFX9-O0-NEXT: v_mov_b32_e32 v16, v19 838; GFX9-O0-NEXT: v_or_b32_e64 v16, v16, v17 839; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 840; GFX9-O0-NEXT: v_mov_b32_e32 v17, v18 841; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13] 842; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 843; GFX9-O0-NEXT: v_mov_b32_e32 v17, v3 844; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2 845; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill 846; GFX9-O0-NEXT: s_nop 0 847; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill 848; GFX9-O0-NEXT: v_mov_b32_e32 v17, v1 849; GFX9-O0-NEXT: v_mov_b32_e32 v16, v0 850; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill 851; GFX9-O0-NEXT: s_nop 0 852; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill 853; GFX9-O0-NEXT: v_mov_b32_e32 v17, v15 854; GFX9-O0-NEXT: v_mov_b32_e32 v16, v14 855; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill 856; GFX9-O0-NEXT: s_nop 0 857; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill 858; GFX9-O0-NEXT: v_mov_b32_e32 v17, v13 859; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12 860; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill 861; GFX9-O0-NEXT: s_nop 0 862; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill 863; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] 864; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6 865; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7 866; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] 867; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 10 868; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 11 869; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 870; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 871; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 872; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill 873; GFX9-O0-NEXT: s_nop 0 874; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill 875; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill 876; GFX9-O0-NEXT: s_nop 0 877; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill 878; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill 879; GFX9-O0-NEXT: s_nop 0 880; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill 881; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill 882; GFX9-O0-NEXT: s_nop 0 883; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill 884; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill 885; GFX9-O0-NEXT: s_nop 0 886; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill 887; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill 888; GFX9-O0-NEXT: s_nop 0 889; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill 890; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill 891; GFX9-O0-NEXT: s_nop 0 892; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill 893; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill 894; GFX9-O0-NEXT: s_nop 0 895; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill 896; GFX9-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] 897; GFX9-O0-NEXT: s_cbranch_execnz .LBB0_6 898; GFX9-O0-NEXT: s_branch .LBB0_1 899; GFX9-O0-NEXT: .LBB0_7: ; %udiv-preheader 900; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 901; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 902; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 903; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload 904; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload 905; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload 906; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload 907; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload 908; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload 909; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload 910; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload 911; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload 912; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload 913; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload 914; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload 915; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload 916; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload 917; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload 918; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload 919; GFX9-O0-NEXT: s_waitcnt vmcnt(9) 920; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 921; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 922; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21] 923; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 924; GFX9-O0-NEXT: s_mov_b32 s6, 64 925; GFX9-O0-NEXT: v_sub_u32_e64 v12, s6, v4 926; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], v12, v[18:19] 927; GFX9-O0-NEXT: v_mov_b32_e32 v12, v23 928; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12 929; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec 930; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22 931; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 932; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 933; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 934; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 935; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v4, s6 936; GFX9-O0-NEXT: v_sub_u32_e64 v5, v4, s6 937; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], v5, v[18:19] 938; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 939; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5] 940; GFX9-O0-NEXT: s_mov_b32 s6, 0 941; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, s6 942; GFX9-O0-NEXT: v_mov_b32_e32 v12, v21 943; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[6:7] 944; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 945; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22 946; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] 947; GFX9-O0-NEXT: v_mov_b32_e32 v7, v20 948; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7] 949; GFX9-O0-NEXT: ; implicit-def: $sgpr6 950; GFX9-O0-NEXT: ; implicit-def: $sgpr6 951; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 952; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 953; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[18:19] 954; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5 955; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 956; GFX9-O0-NEXT: s_mov_b32 s8, s7 957; GFX9-O0-NEXT: v_mov_b32_e32 v12, s8 958; GFX9-O0-NEXT: v_cndmask_b32_e64 v12, v12, v15, s[4:5] 959; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 960; GFX9-O0-NEXT: s_mov_b32 s8, s6 961; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 962; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] 963; GFX9-O0-NEXT: ; implicit-def: $sgpr4 964; GFX9-O0-NEXT: ; implicit-def: $sgpr4 965; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 966; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 967; GFX9-O0-NEXT: v_mov_b32_e32 v12, v13 968; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14 969; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 970; GFX9-O0-NEXT: s_mov_b32 s5, s8 971; GFX9-O0-NEXT: s_mov_b32 s4, s9 972; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16 973; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 974; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 975; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v16 976; GFX9-O0-NEXT: v_mov_b32_e32 v16, s4 977; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v15, v16, vcc 978; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5 979; GFX9-O0-NEXT: v_addc_co_u32_e32 v14, vcc, v14, v15, vcc 980; GFX9-O0-NEXT: v_mov_b32_e32 v15, s4 981; GFX9-O0-NEXT: v_addc_co_u32_e32 v13, vcc, v13, v15, vcc 982; GFX9-O0-NEXT: ; implicit-def: $sgpr4 983; GFX9-O0-NEXT: ; implicit-def: $sgpr4 984; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 985; GFX9-O0-NEXT: v_mov_b32_e32 v15, v13 986; GFX9-O0-NEXT: ; implicit-def: $sgpr4 987; GFX9-O0-NEXT: ; implicit-def: $sgpr4 988; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 989; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 990; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 991; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill 992; GFX9-O0-NEXT: s_nop 0 993; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill 994; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill 995; GFX9-O0-NEXT: s_nop 0 996; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill 997; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] 998; GFX9-O0-NEXT: v_mov_b32_e32 v15, s9 999; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8 1000; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7 1001; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 1002; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 10 1003; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 11 1004; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 1005; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 1006; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 1007; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill 1008; GFX9-O0-NEXT: s_nop 0 1009; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill 1010; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill 1011; GFX9-O0-NEXT: s_nop 0 1012; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill 1013; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill 1014; GFX9-O0-NEXT: s_nop 0 1015; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill 1016; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill 1017; GFX9-O0-NEXT: s_nop 0 1018; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill 1019; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill 1020; GFX9-O0-NEXT: s_nop 0 1021; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill 1022; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill 1023; GFX9-O0-NEXT: s_nop 0 1024; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill 1025; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill 1026; GFX9-O0-NEXT: s_nop 0 1027; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill 1028; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill 1029; GFX9-O0-NEXT: s_nop 0 1030; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill 1031; GFX9-O0-NEXT: s_branch .LBB0_6 1032; GFX9-O0-NEXT: .LBB0_8: ; %udiv-bb1 1033; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 1034; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 1035; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 1036; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload 1037; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload 1038; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload 1039; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload 1040; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload 1041; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload 1042; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload 1043; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload 1044; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1 1045; GFX9-O0-NEXT: s_mov_b32 s5, s6 1046; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 1047; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 1048; GFX9-O0-NEXT: s_mov_b32 s4, s7 1049; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 1050; GFX9-O0-NEXT: s_mov_b32 s8, s6 1051; GFX9-O0-NEXT: s_mov_b32 s9, s7 1052; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 1053; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 1054; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 1055; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4 1056; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4 1057; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 1058; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc 1059; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 1060; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc 1061; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9 1062; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc 1063; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1064; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1065; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1066; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 1067; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1068; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1069; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1070; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 1071; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 1072; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 1073; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill 1074; GFX9-O0-NEXT: s_nop 0 1075; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill 1076; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 1077; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 1078; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill 1079; GFX9-O0-NEXT: s_nop 0 1080; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill 1081; GFX9-O0-NEXT: s_mov_b32 s4, 0x7f 1082; GFX9-O0-NEXT: v_sub_u32_e64 v2, s4, v3 1083; GFX9-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[10:11] 1084; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 1085; GFX9-O0-NEXT: s_mov_b32 s4, 64 1086; GFX9-O0-NEXT: v_sub_u32_e64 v13, s4, v2 1087; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], v13, v[6:7] 1088; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14 1089; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v15 1090; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec 1091; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 1092; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 1093; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 1094; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 1095; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5 1096; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v2, s4 1097; GFX9-O0-NEXT: s_mov_b32 s10, 63 1098; GFX9-O0-NEXT: v_sub_u32_e64 v3, s10, v3 1099; GFX9-O0-NEXT: v_lshlrev_b64 v[12:13], v3, v[6:7] 1100; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 1101; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[4:5] 1102; GFX9-O0-NEXT: s_mov_b32 s10, 0 1103; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v2, s10 1104; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11 1105; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[10:11] 1106; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 1107; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 1108; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] 1109; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 1110; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[10:11] 1111; GFX9-O0-NEXT: ; implicit-def: $sgpr10 1112; GFX9-O0-NEXT: ; implicit-def: $sgpr10 1113; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 1114; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3 1115; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], v2, v[6:7] 1116; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7 1117; GFX9-O0-NEXT: v_mov_b32_e32 v2, s9 1118; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] 1119; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec 1120; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8 1121; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v3, v6, s[4:5] 1122; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1123; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1124; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 1125; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 1126; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill 1127; GFX9-O0-NEXT: s_nop 0 1128; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill 1129; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill 1130; GFX9-O0-NEXT: s_nop 0 1131; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill 1132; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 1133; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 1134; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 1135; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 1136; GFX9-O0-NEXT: v_mov_b32_e32 v0, v8 1137; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1 1138; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1139; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 1140; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7] 1141; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 1142; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8 1143; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9 1144; GFX9-O0-NEXT: v_mov_b32_e32 v0, s6 1145; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7 1146; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill 1147; GFX9-O0-NEXT: s_nop 0 1148; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill 1149; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill 1150; GFX9-O0-NEXT: s_nop 0 1151; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill 1152; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill 1153; GFX9-O0-NEXT: s_nop 0 1154; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill 1155; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill 1156; GFX9-O0-NEXT: s_nop 0 1157; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill 1158; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec 1159; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] 1160; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] 1161; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8 1162; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9 1163; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 1164; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 1165; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 1166; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 1167; GFX9-O0-NEXT: s_cbranch_execz .LBB0_5 1168; GFX9-O0-NEXT: s_branch .LBB0_7 1169; GFX9-O0-NEXT: .LBB0_9: ; %udiv-end 1170; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload 1171; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload 1172; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload 1173; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload 1174; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload 1175; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload 1176; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload 1177; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload 1178; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 1179; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 1180; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 1181; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v2 1182; GFX9-O0-NEXT: v_mov_b32_e32 v4, v7 1183; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec 1184; GFX9-O0-NEXT: v_xor_b32_e64 v8, v5, v4 1185; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1186; GFX9-O0-NEXT: v_mov_b32_e32 v9, v3 1187; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 1188; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 1189; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v6 1190; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 1191; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 1192; GFX9-O0-NEXT: v_xor_b32_e64 v0, v0, v7 1193; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1194; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 1195; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 1196; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 1197; GFX9-O0-NEXT: v_mov_b32_e32 v3, v8 1198; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9 1199; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v7 1200; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v6, vcc 1201; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc 1202; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc 1203; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1204; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1205; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec 1206; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 1207; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1208; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1209; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 1210; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0 1211; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5 1212; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 1213; GFX9-O0-NEXT: s_mov_b32 s4, 32 1214; GFX9-O0-NEXT: v_lshrrev_b64 v[5:6], s4, v[5:6] 1215; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 1216; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4] 1217; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec 1218; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 1219; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload 1220; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 1221; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 1222; GFX9-O0-NEXT: s_setpc_b64 s[30:31] 1223; 1224; GFX9-G-LABEL: v_sdiv_i128_vv: 1225; GFX9-G: ; %bb.0: ; %_udiv-special-cases 1226; GFX9-G-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1227; GFX9-G-NEXT: v_ashrrev_i32_e32 v16, 31, v3 1228; GFX9-G-NEXT: v_xor_b32_e32 v0, v16, v0 1229; GFX9-G-NEXT: v_xor_b32_e32 v1, v16, v1 1230; GFX9-G-NEXT: v_sub_co_u32_e32 v8, vcc, v0, v16 1231; GFX9-G-NEXT: v_xor_b32_e32 v2, v16, v2 1232; GFX9-G-NEXT: v_subb_co_u32_e32 v9, vcc, v1, v16, vcc 1233; GFX9-G-NEXT: v_ashrrev_i32_e32 v17, 31, v7 1234; GFX9-G-NEXT: v_xor_b32_e32 v3, v16, v3 1235; GFX9-G-NEXT: v_subb_co_u32_e32 v10, vcc, v2, v16, vcc 1236; GFX9-G-NEXT: v_subb_co_u32_e32 v11, vcc, v3, v16, vcc 1237; GFX9-G-NEXT: v_xor_b32_e32 v0, v17, v4 1238; GFX9-G-NEXT: v_xor_b32_e32 v1, v17, v5 1239; GFX9-G-NEXT: v_sub_co_u32_e32 v18, vcc, v0, v17 1240; GFX9-G-NEXT: v_xor_b32_e32 v2, v17, v6 1241; GFX9-G-NEXT: v_subb_co_u32_e32 v19, vcc, v1, v17, vcc 1242; GFX9-G-NEXT: v_xor_b32_e32 v3, v17, v7 1243; GFX9-G-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v17, vcc 1244; GFX9-G-NEXT: v_subb_co_u32_e32 v5, vcc, v3, v17, vcc 1245; GFX9-G-NEXT: v_or_b32_e32 v0, v18, v4 1246; GFX9-G-NEXT: v_or_b32_e32 v1, v19, v5 1247; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 1248; GFX9-G-NEXT: v_or_b32_e32 v0, v8, v10 1249; GFX9-G-NEXT: v_or_b32_e32 v1, v9, v11 1250; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] 1251; GFX9-G-NEXT: v_ffbh_u32_e32 v1, v18 1252; GFX9-G-NEXT: v_ffbh_u32_e32 v0, v19 1253; GFX9-G-NEXT: v_add_u32_e32 v1, 32, v1 1254; GFX9-G-NEXT: v_ffbh_u32_e32 v2, v4 1255; GFX9-G-NEXT: v_min_u32_e32 v0, v0, v1 1256; GFX9-G-NEXT: v_ffbh_u32_e32 v1, v5 1257; GFX9-G-NEXT: v_add_u32_e32 v2, 32, v2 1258; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[4:5] 1259; GFX9-G-NEXT: v_add_u32_e32 v0, 64, v0 1260; GFX9-G-NEXT: v_min_u32_e32 v1, v1, v2 1261; GFX9-G-NEXT: v_ffbh_u32_e32 v2, v8 1262; GFX9-G-NEXT: v_cndmask_b32_e64 v0, v1, v0, s[6:7] 1263; GFX9-G-NEXT: v_ffbh_u32_e32 v1, v9 1264; GFX9-G-NEXT: v_add_u32_e32 v2, 32, v2 1265; GFX9-G-NEXT: v_ffbh_u32_e32 v3, v10 1266; GFX9-G-NEXT: v_min_u32_e32 v1, v1, v2 1267; GFX9-G-NEXT: v_ffbh_u32_e32 v2, v11 1268; GFX9-G-NEXT: v_add_u32_e32 v3, 32, v3 1269; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[10:11] 1270; GFX9-G-NEXT: v_add_u32_e32 v1, 64, v1 1271; GFX9-G-NEXT: v_min_u32_e32 v2, v2, v3 1272; GFX9-G-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[6:7] 1273; GFX9-G-NEXT: v_sub_co_u32_e64 v0, s[6:7], v0, v1 1274; GFX9-G-NEXT: v_subb_co_u32_e64 v1, s[6:7], 0, 0, s[6:7] 1275; GFX9-G-NEXT: v_mov_b32_e32 v6, 0x7f 1276; GFX9-G-NEXT: v_subb_co_u32_e64 v2, s[6:7], 0, 0, s[6:7] 1277; GFX9-G-NEXT: v_mov_b32_e32 v7, 0 1278; GFX9-G-NEXT: v_subb_co_u32_e64 v3, s[6:7], 0, 0, s[6:7] 1279; GFX9-G-NEXT: v_cmp_gt_u64_e64 s[6:7], v[0:1], v[6:7] 1280; GFX9-G-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1281; GFX9-G-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[6:7] 1282; GFX9-G-NEXT: v_cmp_lt_u64_e64 s[6:7], 0, v[2:3] 1283; GFX9-G-NEXT: v_or_b32_e32 v15, v1, v3 1284; GFX9-G-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[6:7] 1285; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[2:3] 1286; GFX9-G-NEXT: s_mov_b64 s[8:9], 0 1287; GFX9-G-NEXT: v_cndmask_b32_e64 v6, v7, v6, s[6:7] 1288; GFX9-G-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 1289; GFX9-G-NEXT: v_or_b32_e32 v20, v7, v6 1290; GFX9-G-NEXT: v_xor_b32_e32 v6, 0x7f, v0 1291; GFX9-G-NEXT: v_or_b32_e32 v14, v6, v2 1292; GFX9-G-NEXT: v_and_b32_e32 v6, 1, v20 1293; GFX9-G-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 1294; GFX9-G-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc 1295; GFX9-G-NEXT: v_cndmask_b32_e64 v7, v9, 0, vcc 1296; GFX9-G-NEXT: v_cndmask_b32_e64 v12, v10, 0, vcc 1297; GFX9-G-NEXT: v_cndmask_b32_e64 v13, v11, 0, vcc 1298; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] 1299; GFX9-G-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1300; GFX9-G-NEXT: v_or_b32_e32 v14, v20, v14 1301; GFX9-G-NEXT: v_and_b32_e32 v14, 1, v14 1302; GFX9-G-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 1303; GFX9-G-NEXT: s_xor_b64 s[4:5], vcc, -1 1304; GFX9-G-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 1305; GFX9-G-NEXT: s_cbranch_execz .LBB0_6 1306; GFX9-G-NEXT: ; %bb.1: ; %udiv-bb1 1307; GFX9-G-NEXT: v_add_co_u32_e32 v20, vcc, 1, v0 1308; GFX9-G-NEXT: v_addc_co_u32_e32 v21, vcc, 0, v1, vcc 1309; GFX9-G-NEXT: v_addc_co_u32_e32 v22, vcc, 0, v2, vcc 1310; GFX9-G-NEXT: v_addc_co_u32_e32 v23, vcc, 0, v3, vcc 1311; GFX9-G-NEXT: s_xor_b64 s[4:5], vcc, -1 1312; GFX9-G-NEXT: v_sub_co_u32_e32 v12, vcc, 0x7f, v0 1313; GFX9-G-NEXT: v_sub_u32_e32 v0, 64, v12 1314; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v0, v[8:9] 1315; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], v12, v[10:11] 1316; GFX9-G-NEXT: v_add_u32_e32 v13, 0xffffffc0, v12 1317; GFX9-G-NEXT: v_lshlrev_b64 v[6:7], v12, v[8:9] 1318; GFX9-G-NEXT: v_or_b32_e32 v2, v0, v2 1319; GFX9-G-NEXT: v_or_b32_e32 v3, v1, v3 1320; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], v13, v[8:9] 1321; GFX9-G-NEXT: v_cmp_gt_u32_e32 vcc, 64, v12 1322; GFX9-G-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc 1323; GFX9-G-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc 1324; GFX9-G-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1325; GFX9-G-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1326; GFX9-G-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12 1327; GFX9-G-NEXT: v_cndmask_b32_e32 v12, v0, v10, vcc 1328; GFX9-G-NEXT: v_cndmask_b32_e32 v13, v1, v11, vcc 1329; GFX9-G-NEXT: s_mov_b64 s[10:11], s[8:9] 1330; GFX9-G-NEXT: v_mov_b32_e32 v0, s8 1331; GFX9-G-NEXT: v_mov_b32_e32 v1, s9 1332; GFX9-G-NEXT: v_mov_b32_e32 v2, s10 1333; GFX9-G-NEXT: v_mov_b32_e32 v3, s11 1334; GFX9-G-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] 1335; GFX9-G-NEXT: s_xor_b64 s[12:13], exec, s[8:9] 1336; GFX9-G-NEXT: s_cbranch_execz .LBB0_5 1337; GFX9-G-NEXT: ; %bb.2: ; %udiv-preheader 1338; GFX9-G-NEXT: v_sub_u32_e32 v2, 64, v20 1339; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v20, v[8:9] 1340; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], v2, v[10:11] 1341; GFX9-G-NEXT: v_add_u32_e32 v24, 0xffffffc0, v20 1342; GFX9-G-NEXT: v_lshrrev_b64 v[14:15], v20, v[10:11] 1343; GFX9-G-NEXT: v_or_b32_e32 v2, v0, v2 1344; GFX9-G-NEXT: v_or_b32_e32 v3, v1, v3 1345; GFX9-G-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11] 1346; GFX9-G-NEXT: v_cmp_gt_u32_e32 vcc, 64, v20 1347; GFX9-G-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1348; GFX9-G-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1349; GFX9-G-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc 1350; GFX9-G-NEXT: v_cndmask_b32_e32 v15, 0, v15, vcc 1351; GFX9-G-NEXT: v_add_co_u32_e32 v24, vcc, -1, v18 1352; GFX9-G-NEXT: s_mov_b64 s[8:9], 0 1353; GFX9-G-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v20 1354; GFX9-G-NEXT: v_addc_co_u32_e32 v25, vcc, -1, v19, vcc 1355; GFX9-G-NEXT: v_cndmask_b32_e64 v10, v0, v8, s[4:5] 1356; GFX9-G-NEXT: v_cndmask_b32_e64 v11, v1, v9, s[4:5] 1357; GFX9-G-NEXT: v_addc_co_u32_e32 v26, vcc, -1, v4, vcc 1358; GFX9-G-NEXT: s_mov_b64 s[10:11], s[8:9] 1359; GFX9-G-NEXT: v_mov_b32_e32 v0, s8 1360; GFX9-G-NEXT: v_addc_co_u32_e32 v27, vcc, -1, v5, vcc 1361; GFX9-G-NEXT: v_mov_b32_e32 v9, 0 1362; GFX9-G-NEXT: v_mov_b32_e32 v1, s9 1363; GFX9-G-NEXT: v_mov_b32_e32 v2, s10 1364; GFX9-G-NEXT: v_mov_b32_e32 v3, s11 1365; GFX9-G-NEXT: .LBB0_3: ; %udiv-do-while 1366; GFX9-G-NEXT: ; =>This Inner Loop Header: Depth=1 1367; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], 1, v[6:7] 1368; GFX9-G-NEXT: v_lshrrev_b32_e32 v8, 31, v7 1369; GFX9-G-NEXT: v_or_b32_e32 v6, v0, v2 1370; GFX9-G-NEXT: v_or_b32_e32 v7, v1, v3 1371; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], 1, v[10:11] 1372; GFX9-G-NEXT: v_lshrrev_b32_e32 v10, 31, v13 1373; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], 1, v[14:15] 1374; GFX9-G-NEXT: v_or_b32_e32 v2, v2, v10 1375; GFX9-G-NEXT: v_lshrrev_b32_e32 v14, 31, v11 1376; GFX9-G-NEXT: v_sub_co_u32_e32 v10, vcc, v24, v2 1377; GFX9-G-NEXT: v_or_b32_e32 v0, v0, v14 1378; GFX9-G-NEXT: v_subb_co_u32_e32 v10, vcc, v25, v3, vcc 1379; GFX9-G-NEXT: v_subb_co_u32_e32 v10, vcc, v26, v0, vcc 1380; GFX9-G-NEXT: v_subb_co_u32_e32 v10, vcc, v27, v1, vcc 1381; GFX9-G-NEXT: v_ashrrev_i32_e32 v28, 31, v10 1382; GFX9-G-NEXT: v_and_b32_e32 v10, v28, v18 1383; GFX9-G-NEXT: v_and_b32_e32 v11, v28, v19 1384; GFX9-G-NEXT: v_sub_co_u32_e32 v10, vcc, v2, v10 1385; GFX9-G-NEXT: v_subb_co_u32_e32 v11, vcc, v3, v11, vcc 1386; GFX9-G-NEXT: v_and_b32_e32 v2, v28, v4 1387; GFX9-G-NEXT: v_and_b32_e32 v3, v28, v5 1388; GFX9-G-NEXT: v_subb_co_u32_e32 v14, vcc, v0, v2, vcc 1389; GFX9-G-NEXT: v_subb_co_u32_e32 v15, vcc, v1, v3, vcc 1390; GFX9-G-NEXT: v_add_co_u32_e32 v20, vcc, -1, v20 1391; GFX9-G-NEXT: v_addc_co_u32_e32 v21, vcc, -1, v21, vcc 1392; GFX9-G-NEXT: v_addc_co_u32_e32 v22, vcc, -1, v22, vcc 1393; GFX9-G-NEXT: v_addc_co_u32_e32 v23, vcc, -1, v23, vcc 1394; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], 1, v[12:13] 1395; GFX9-G-NEXT: v_or_b32_e32 v0, v20, v22 1396; GFX9-G-NEXT: v_or_b32_e32 v1, v21, v23 1397; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 1398; GFX9-G-NEXT: v_or_b32_e32 v12, v12, v8 1399; GFX9-G-NEXT: v_and_b32_e32 v8, 1, v28 1400; GFX9-G-NEXT: v_mov_b32_e32 v0, v8 1401; GFX9-G-NEXT: s_or_b64 s[8:9], vcc, s[8:9] 1402; GFX9-G-NEXT: v_mov_b32_e32 v1, v9 1403; GFX9-G-NEXT: s_andn2_b64 exec, exec, s[8:9] 1404; GFX9-G-NEXT: s_cbranch_execnz .LBB0_3 1405; GFX9-G-NEXT: ; %bb.4: ; %Flow 1406; GFX9-G-NEXT: s_or_b64 exec, exec, s[8:9] 1407; GFX9-G-NEXT: .LBB0_5: ; %Flow2 1408; GFX9-G-NEXT: s_or_b64 exec, exec, s[12:13] 1409; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], 1, v[6:7] 1410; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], 1, v[12:13] 1411; GFX9-G-NEXT: v_lshrrev_b32_e32 v4, 31, v7 1412; GFX9-G-NEXT: v_or_b32_e32 v12, v12, v4 1413; GFX9-G-NEXT: v_or_b32_e32 v6, v0, v2 1414; GFX9-G-NEXT: v_or_b32_e32 v7, v1, v3 1415; GFX9-G-NEXT: .LBB0_6: ; %Flow3 1416; GFX9-G-NEXT: s_or_b64 exec, exec, s[6:7] 1417; GFX9-G-NEXT: v_xor_b32_e32 v3, v17, v16 1418; GFX9-G-NEXT: v_xor_b32_e32 v0, v6, v3 1419; GFX9-G-NEXT: v_xor_b32_e32 v1, v7, v3 1420; GFX9-G-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3 1421; GFX9-G-NEXT: v_xor_b32_e32 v2, v12, v3 1422; GFX9-G-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc 1423; GFX9-G-NEXT: v_xor_b32_e32 v4, v13, v3 1424; GFX9-G-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v3, vcc 1425; GFX9-G-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc 1426; GFX9-G-NEXT: s_setpc_b64 s[30:31] 1427; 1428; GFX9-G-O0-LABEL: v_sdiv_i128_vv: 1429; GFX9-G-O0: ; %bb.0: ; %_udiv-special-cases 1430; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1431; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 1432; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill 1433; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] 1434; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v1 1435; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2 1436; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3 1437; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec 1438; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v10 1439; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9 1440; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v8 1441; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill 1442; GFX9-G-O0-NEXT: s_nop 0 1443; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill 1444; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill 1445; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill 1446; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v4 1447; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v5 1448; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v6 1449; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload 1450; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload 1451; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload 1452; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload 1453; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec 1454; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v1 1455; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v0 1456; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v7 1457; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 1458; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], 0x7f 1459; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr3_vgpr4 killed $exec 1460; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 1461; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5 1462; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v6 1463; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec 1464; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v1 1465; GFX9-G-O0-NEXT: s_mov_b32 s6, 31 1466; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s6 1467; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v11, v2, v7 1468; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec 1469; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec 1470; GFX9-G-O0-NEXT: s_mov_b32 s6, 31 1471; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6 1472; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v9, v0, v1 1473; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr12_vgpr13 killed $exec 1474; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v14 1475; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v15 1476; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec 1477; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v1 1478; GFX9-G-O0-NEXT: s_mov_b32 s6, 31 1479; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s6 1480; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v10, v2, v7 1481; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec 1482; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec 1483; GFX9-G-O0-NEXT: s_mov_b32 s6, 31 1484; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6 1485; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v8, v0, v1 1486; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3 1487; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4 1488; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5 1489; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6 1490; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v1 1491; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2 1492; GFX9-G-O0-NEXT: v_xor_b32_e64 v0, v11, v0 1493; GFX9-G-O0-NEXT: v_xor_b32_e64 v1, v11, v1 1494; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4 1495; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5 1496; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v9, v3 1497; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v9, v2 1498; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v0, s[6:7], v0, v11 1499; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill 1500; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v1, s[6:7], v1, v11, s[6:7] 1501; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill 1502; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v5, s[6:7], v3, v9, s[6:7] 1503; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill 1504; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v2, s[6:7], v2, v9, s[6:7] 1505; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill 1506; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12 1507; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v13 1508; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v14 1509; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15 1510; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v6 1511; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v7 1512; GFX9-G-O0-NEXT: v_xor_b32_e64 v7, v10, v4 1513; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v10, v3 1514; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12 1515; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v13 1516; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v8, v6 1517; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v8, v3 1518; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v7, s[6:7], v7, v10 1519; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill 1520; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v4, s[6:7], v4, v10, s[6:7] 1521; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill 1522; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v6, s[6:7], v6, v8, s[6:7] 1523; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill 1524; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[6:7], v3, v8, s[6:7] 1525; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill 1526; GFX9-G-O0-NEXT: v_xor_b32_e64 v12, v10, v11 1527; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill 1528; GFX9-G-O0-NEXT: v_xor_b32_e64 v10, v10, v11 1529; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill 1530; GFX9-G-O0-NEXT: v_xor_b32_e64 v10, v8, v9 1531; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill 1532; GFX9-G-O0-NEXT: v_xor_b32_e64 v8, v8, v9 1533; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill 1534; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v7, v6 1535; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v4, v3 1536; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1537; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 1538; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5 1539; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4 1540; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[6:7], v[8:9], v[10:11] 1541; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v0, v5 1542; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v1, v2 1543; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1544; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 1545; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5 1546; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4 1547; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], v[8:9], v[10:11] 1548; GFX9-G-O0-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] 1549; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v6 1550; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v3 1551; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5 1552; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4 1553; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], v[8:9], v[10:11] 1554; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v4 1555; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v7 1556; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32 1557; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8 1558; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v7 1559; GFX9-G-O0-NEXT: s_mov_b32 s12, 64 1560; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s12 1561; GFX9-G-O0-NEXT: v_add_u32_e64 v4, v4, v7 1562; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v3, v3 1563; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6 1564; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32 1565; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7 1566; GFX9-G-O0-NEXT: v_min_u32_e64 v3, v3, v6 1567; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[10:11] 1568; GFX9-G-O0-NEXT: s_mov_b32 s16, 0 1569; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v5 1570; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v2 1571; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s5 1572; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s4 1573; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], v[6:7], v[8:9] 1574; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v1 1575; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v0 1576; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32 1577; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7 1578; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v6 1579; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s12 1580; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v4, v6 1581; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v2 1582; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v5 1583; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32 1584; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8 1585; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v7 1586; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[10:11] 1587; GFX9-G-O0-NEXT: s_mov_b32 s15, 0 1588; GFX9-G-O0-NEXT: s_mov_b32 s13, 0 1589; GFX9-G-O0-NEXT: s_mov_b32 s14, 0 1590; GFX9-G-O0-NEXT: s_mov_b32 s12, 0 1591; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v6, s[10:11], v3, v4 1592; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill 1593; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s16 1594; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s16 1595; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[10:11], v3, v4, s[10:11] 1596; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill 1597; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s15 1598; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s14 1599; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[10:11], v4, v7, s[10:11] 1600; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill 1601; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s13 1602; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s12 1603; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[10:11], v4, v7, s[10:11] 1604; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill 1605; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v6 1606; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v3 1607; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v8 1608; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v7 1609; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s9 1610; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s8 1611; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[12:13], v[11:12], v[13:14] 1612; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5 1613; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4 1614; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[10:11], v[9:10], v[11:12] 1615; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5 1616; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4 1617; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12] 1618; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 1 1619; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 1620; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[12:13] 1621; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1 1622; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 1623; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11] 1624; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[8:9] 1625; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1 1626; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 1627; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7] 1628; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v9 1629; GFX9-G-O0-NEXT: s_mov_b32 s7, 0x7f 1630; GFX9-G-O0-NEXT: s_mov_b32 s6, 0 1631; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v6, s7 1632; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v3, s6 1633; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v6, v8 1634; GFX9-G-O0-NEXT: v_or_b32_e64 v3, v3, v7 1635; GFX9-G-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 1636; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v3 1637; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s5 1638; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s4 1639; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[6:7], v[8:9] 1640; GFX9-G-O0-NEXT: v_and_b32_e32 v3, 1, v4 1641; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v3 1642; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0 1643; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0 1644; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[6:7] 1645; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v1, v3, s[6:7] 1646; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1647; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3 1648; GFX9-G-O0-NEXT: v_and_b32_e32 v3, 1, v4 1649; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v3 1650; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0 1651; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0 1652; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[6:7] 1653; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7] 1654; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 1655; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v2 1656; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec 1657; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5 1658; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6 1659; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 1 1660; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0 1661; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] 1662; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v5 1663; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v4 1664; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 1665; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], -1 1666; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] 1667; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 1668; GFX9-G-O0-NEXT: s_nop 0 1669; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 1670; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill 1671; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 1672; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], exec 1673; GFX9-G-O0-NEXT: ; implicit-def: $vgpr34 : SGPR spill to VGPR lane 1674; GFX9-G-O0-NEXT: v_writelane_b32 v34, s4, 0 1675; GFX9-G-O0-NEXT: v_writelane_b32 v34, s5, 1 1676; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 1677; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill 1678; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] 1679; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 1680; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] 1681; GFX9-G-O0-NEXT: s_cbranch_execz .LBB0_3 1682; GFX9-G-O0-NEXT: s_branch .LBB0_8 1683; GFX9-G-O0-NEXT: .LBB0_1: ; %Flow 1684; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 1685; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload 1686; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] 1687; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 1688; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 2 1689; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 3 1690; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] 1691; GFX9-G-O0-NEXT: ; %bb.2: ; %Flow 1692; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload 1693; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload 1694; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload 1695; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload 1696; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload 1697; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload 1698; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload 1699; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload 1700; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 1701; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill 1702; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 1703; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill 1704; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 1705; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill 1706; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 1707; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill 1708; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 1709; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill 1710; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 1711; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill 1712; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 1713; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill 1714; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 1715; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill 1716; GFX9-G-O0-NEXT: s_branch .LBB0_5 1717; GFX9-G-O0-NEXT: .LBB0_3: ; %Flow2 1718; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 1719; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload 1720; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] 1721; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 1722; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 0 1723; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 1 1724; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] 1725; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 1726; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload 1727; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload 1728; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload 1729; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 1730; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill 1731; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 1732; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill 1733; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 1734; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill 1735; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 1736; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill 1737; GFX9-G-O0-NEXT: s_branch .LBB0_9 1738; GFX9-G-O0-NEXT: .LBB0_4: ; %udiv-loop-exit 1739; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload 1740; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload 1741; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload 1742; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload 1743; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload 1744; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload 1745; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload 1746; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload 1747; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) 1748; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4 1749; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5 1750; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 1751; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v6 1752; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v7 1753; GFX9-G-O0-NEXT: s_mov_b32 s4, 1 1754; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4 1755; GFX9-G-O0-NEXT: v_lshlrev_b64 v[10:11], v0, v[2:3] 1756; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4 1757; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[4:5] 1758; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr2 killed $exec 1759; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec 1760; GFX9-G-O0-NEXT: s_mov_b32 s4, 31 1761; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4 1762; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v6, v2, v3 1763; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0 1764; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v0 1765; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1 1766; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v14 1767; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15 1768; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16 1769; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17 1770; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v12 1771; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v13 1772; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v10 1773; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v11 1774; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v7 1775; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v1, v5 1776; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1777; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v5 1778; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8 1779; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v9 1780; GFX9-G-O0-NEXT: v_or3_b32 v4, v4, v6, v7 1781; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v5 1782; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 1783; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v2 1784; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec 1785; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4 1786; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5 1787; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 1788; GFX9-G-O0-NEXT: s_nop 0 1789; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 1790; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill 1791; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 1792; GFX9-G-O0-NEXT: s_branch .LBB0_3 1793; GFX9-G-O0-NEXT: .LBB0_5: ; %Flow1 1794; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 1795; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload 1796; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] 1797; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 1798; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 4 1799; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 5 1800; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] 1801; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload 1802; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload 1803; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload 1804; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload 1805; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload 1806; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload 1807; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload 1808; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload 1809; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 1810; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill 1811; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 1812; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill 1813; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 1814; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill 1815; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 1816; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill 1817; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill 1818; GFX9-G-O0-NEXT: s_nop 0 1819; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill 1820; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill 1821; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill 1822; GFX9-G-O0-NEXT: s_branch .LBB0_4 1823; GFX9-G-O0-NEXT: .LBB0_6: ; %udiv-do-while 1824; GFX9-G-O0-NEXT: ; =>This Inner Loop Header: Depth=1 1825; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 1826; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload 1827; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] 1828; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 1829; GFX9-G-O0-NEXT: v_readlane_b32 s6, v34, 6 1830; GFX9-G-O0-NEXT: v_readlane_b32 s7, v34, 7 1831; GFX9-G-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload 1832; GFX9-G-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload 1833; GFX9-G-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload 1834; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload 1835; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload 1836; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload 1837; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload 1838; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload 1839; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload 1840; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload 1841; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload 1842; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload 1843; GFX9-G-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload 1844; GFX9-G-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload 1845; GFX9-G-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload 1846; GFX9-G-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload 1847; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload 1848; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload 1849; GFX9-G-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload 1850; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload 1851; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload 1852; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload 1853; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload 1854; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload 1855; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 1856; GFX9-G-O0-NEXT: s_waitcnt vmcnt(18) 1857; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v2 1858; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3 1859; GFX9-G-O0-NEXT: s_waitcnt vmcnt(16) 1860; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4 1861; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5 1862; GFX9-G-O0-NEXT: s_mov_b32 s8, 1 1863; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8 1864; GFX9-G-O0-NEXT: v_lshlrev_b64 v[14:15], v2, v[0:1] 1865; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8 1866; GFX9-G-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[3:4] 1867; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec 1868; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec 1869; GFX9-G-O0-NEXT: s_mov_b32 s9, 31 1870; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s9 1871; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v3, v0, v1 1872; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0 1873; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4 1874; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5 1875; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v2, v3 1876; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v0, v1 1877; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr22_vgpr23 killed $exec 1878; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v24 1879; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v25 1880; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec 1881; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec 1882; GFX9-G-O0-NEXT: s_mov_b32 s9, 31 1883; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s9 1884; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v3, v0, v1 1885; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0 1886; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v14 1887; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v15 1888; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v2, v3 1889; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v0, v1 1890; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v22 1891; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v23 1892; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v24 1893; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v25 1894; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8 1895; GFX9-G-O0-NEXT: v_lshlrev_b64 v[26:27], v0, v[2:3] 1896; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8 1897; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[14:15] 1898; GFX9-G-O0-NEXT: ; kill: def $vgpr14 killed $vgpr2 killed $exec 1899; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec 1900; GFX9-G-O0-NEXT: s_mov_b32 s8, 31 1901; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8 1902; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v22, v2, v3 1903; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0 1904; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v0 1905; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1 1906; GFX9-G-O0-NEXT: s_waitcnt vmcnt(10) 1907; GFX9-G-O0-NEXT: v_mov_b32_e32 v28, v30 1908; GFX9-G-O0-NEXT: v_mov_b32_e32 v29, v31 1909; GFX9-G-O0-NEXT: s_waitcnt vmcnt(8) 1910; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v32 1911; GFX9-G-O0-NEXT: v_mov_b32_e32 v25, v33 1912; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v28 1913; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v29 1914; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v26 1915; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v27 1916; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v23 1917; GFX9-G-O0-NEXT: v_or_b32_e64 v15, v1, v15 1918; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1919; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v15 1920; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v24 1921; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v25 1922; GFX9-G-O0-NEXT: v_or3_b32 v14, v14, v22, v23 1923; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v15 1924; GFX9-G-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 1925; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v2 1926; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec 1927; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v14 1928; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v15 1929; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 1930; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v13, s[8:9], v13, v4 1931; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v12, s[8:9], v12, v9, s[8:9] 1932; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v10, v7, s[8:9] 1933; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v12, s[8:9], v6, v5, s[8:9] 1934; GFX9-G-O0-NEXT: s_mov_b32 s8, 31 1935; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8 1936; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v10, v6, v12 1937; GFX9-G-O0-NEXT: s_mov_b32 s8, 31 1938; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8 1939; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v6, v6, v12 1940; GFX9-G-O0-NEXT: s_mov_b32 s9, 1 1941; GFX9-G-O0-NEXT: s_mov_b32 s8, 0 1942; GFX9-G-O0-NEXT: v_and_b32_e64 v12, v10, s9 1943; GFX9-G-O0-NEXT: v_and_b32_e64 v14, v10, s8 1944; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 1945; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v14 1946; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, s5 1947; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, s4 1948; GFX9-G-O0-NEXT: ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec 1949; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v22 1950; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v23 1951; GFX9-G-O0-NEXT: v_and_b32_e64 v11, v10, v11 1952; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v10, v21 1953; GFX9-G-O0-NEXT: v_and_b32_e64 v8, v6, v8 1954; GFX9-G-O0-NEXT: v_and_b32_e64 v6, v6, v20 1955; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v4, s[8:9], v4, v11 1956; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v9, v10, s[8:9] 1957; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v9, s[8:9], v7, v8, s[8:9] 1958; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v5, v6, s[8:9] 1959; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec 1960; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v10 1961; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v9 1962; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8 1963; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v16 1964; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v17 1965; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v18 1966; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v19 1967; GFX9-G-O0-NEXT: s_mov_b32 s8, -1 1968; GFX9-G-O0-NEXT: s_mov_b32 s12, -1 1969; GFX9-G-O0-NEXT: s_mov_b32 s11, -1 1970; GFX9-G-O0-NEXT: s_mov_b32 s10, -1 1971; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s8 1972; GFX9-G-O0-NEXT: v_add_co_u32_e64 v16, s[8:9], v11, v16 1973; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s12 1974; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v17, s[8:9], v10, v11, s[8:9] 1975; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s11 1976; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v19, s[8:9], v9, v10, s[8:9] 1977; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s10 1978; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v18, s[8:9], v8, v9, s[8:9] 1979; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16 1980; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17 1981; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v19 1982; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v18 1983; GFX9-G-O0-NEXT: v_or_b32_e64 v16, v16, v19 1984; GFX9-G-O0-NEXT: v_or_b32_e64 v18, v17, v18 1985; GFX9-G-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 1986; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v18 1987; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, s5 1988; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, s4 1989; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[18:19] 1990; GFX9-G-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 1991; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v3 1992; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v2 1993; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v1 1994; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v0 1995; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill 1996; GFX9-G-O0-NEXT: s_nop 0 1997; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill 1998; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill 1999; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill 2000; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v15 2001; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v14 2002; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v13 2003; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v12 2004; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill 2005; GFX9-G-O0-NEXT: s_nop 0 2006; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill 2007; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill 2008; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill 2009; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5] 2010; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 2 2011; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 3 2012; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5] 2013; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 6 2014; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 7 2015; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 2016; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill 2017; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] 2018; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill 2019; GFX9-G-O0-NEXT: s_nop 0 2020; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill 2021; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill 2022; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill 2023; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill 2024; GFX9-G-O0-NEXT: s_nop 0 2025; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill 2026; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill 2027; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill 2028; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill 2029; GFX9-G-O0-NEXT: s_nop 0 2030; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill 2031; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill 2032; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill 2033; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill 2034; GFX9-G-O0-NEXT: s_nop 0 2035; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill 2036; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill 2037; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill 2038; GFX9-G-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] 2039; GFX9-G-O0-NEXT: s_cbranch_execnz .LBB0_6 2040; GFX9-G-O0-NEXT: s_branch .LBB0_1 2041; GFX9-G-O0-NEXT: .LBB0_7: ; %udiv-preheader 2042; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 2043; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload 2044; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] 2045; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload 2046; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload 2047; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload 2048; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload 2049; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload 2050; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload 2051; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload 2052; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload 2053; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload 2054; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload 2055; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload 2056; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload 2057; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload 2058; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload 2059; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload 2060; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload 2061; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload 2062; GFX9-G-O0-NEXT: s_mov_b32 s4, 64 2063; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 2064; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v17 2065; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v16 2066; GFX9-G-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec 2067; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) 2068; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v4 2069; GFX9-G-O0-NEXT: s_mov_b32 s5, 0xffffffc0 2070; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s5 2071; GFX9-G-O0-NEXT: v_add_u32_e64 v4, v18, v4 2072; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4 2073; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v18 2074; GFX9-G-O0-NEXT: s_mov_b32 s6, 0 2075; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4 2076; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v18, v6 2077; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s6 2078; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v18, v6 2079; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 2080; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v18, v[20:21] 2081; GFX9-G-O0-NEXT: v_lshrrev_b64 v[25:26], v18, v[22:23] 2082; GFX9-G-O0-NEXT: v_lshlrev_b64 v[23:24], v5, v[20:21] 2083; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v25 2084; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v26 2085; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v23 2086; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v24 2087; GFX9-G-O0-NEXT: v_or_b32_e64 v19, v19, v22 2088; GFX9-G-O0-NEXT: v_or_b32_e64 v18, v5, v18 2089; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], 0 2090; GFX9-G-O0-NEXT: v_lshrrev_b64 v[20:21], v4, v[20:21] 2091; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v20 2092; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v21 2093; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v19, s[4:5] 2094; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v18, s[4:5] 2095; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v17, s[6:7] 2096; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v16, v5, v16, s[6:7] 2097; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 2098; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v16 2099; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v6 2100; GFX9-G-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec 2101; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, 0 2102; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0 2103; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[4:5] 2104; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] 2105; GFX9-G-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 2106; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v6 2107; GFX9-G-O0-NEXT: ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec 2108; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v16 2109; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v17 2110; GFX9-G-O0-NEXT: s_mov_b32 s4, -1 2111; GFX9-G-O0-NEXT: s_mov_b32 s10, -1 2112; GFX9-G-O0-NEXT: s_mov_b32 s7, -1 2113; GFX9-G-O0-NEXT: s_mov_b32 s6, -1 2114; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s4 2115; GFX9-G-O0-NEXT: v_add_co_u32_e64 v15, s[4:5], v15, v16 2116; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill 2117; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s10 2118; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v14, s[4:5], v14, v15, s[4:5] 2119; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill 2120; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s7 2121; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v13, s[4:5], v13, v14, s[4:5] 2122; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill 2123; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s6 2124; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v12, s[4:5], v12, v13, s[4:5] 2125; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill 2126; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], s[8:9] 2127; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[8:9] 2128; GFX9-G-O0-NEXT: v_writelane_b32 v34, s8, 6 2129; GFX9-G-O0-NEXT: v_writelane_b32 v34, s9, 7 2130; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 2131; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill 2132; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] 2133; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s7 2134; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s6 2135; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s5 2136; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s4 2137; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill 2138; GFX9-G-O0-NEXT: s_nop 0 2139; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill 2140; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill 2141; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill 2142; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill 2143; GFX9-G-O0-NEXT: s_nop 0 2144; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill 2145; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill 2146; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill 2147; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill 2148; GFX9-G-O0-NEXT: s_nop 0 2149; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill 2150; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill 2151; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill 2152; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill 2153; GFX9-G-O0-NEXT: s_nop 0 2154; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill 2155; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill 2156; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill 2157; GFX9-G-O0-NEXT: s_branch .LBB0_6 2158; GFX9-G-O0-NEXT: .LBB0_8: ; %udiv-bb1 2159; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 2160; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload 2161; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] 2162; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload 2163; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload 2164; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload 2165; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload 2166; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload 2167; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload 2168; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload 2169; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload 2170; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 2171; GFX9-G-O0-NEXT: s_mov_b32 s6, 1 2172; GFX9-G-O0-NEXT: s_mov_b32 s10, 0 2173; GFX9-G-O0-NEXT: s_mov_b32 s9, 0 2174; GFX9-G-O0-NEXT: s_mov_b32 s8, 0 2175; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s6 2176; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 2177; GFX9-G-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v2, v4 2178; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill 2179; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10 2180; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) 2181; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v5, v7, s[6:7] 2182; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s9 2183; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v6, v7, s[6:7] 2184; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8 2185; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v1, v6, s[6:7] 2186; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v4 2187; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v5 2188; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v7 2189; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v6 2190; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill 2191; GFX9-G-O0-NEXT: s_nop 0 2192; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill 2193; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill 2194; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill 2195; GFX9-G-O0-NEXT: s_mov_b32 s6, 0x7f 2196; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6 2197; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v8, s[6:7], v1, v2 2198; GFX9-G-O0-NEXT: s_mov_b32 s7, 64 2199; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 2200; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v0 2201; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v9 2202; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v3 2203; GFX9-G-O0-NEXT: s_mov_b32 s6, 0xffffffc0 2204; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6 2205; GFX9-G-O0-NEXT: v_add_u32_e64 v2, v8, v0 2206; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7 2207; GFX9-G-O0-NEXT: v_sub_u32_e64 v14, v0, v8 2208; GFX9-G-O0-NEXT: s_mov_b32 s6, 0 2209; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7 2210; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v8, v0 2211; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6 2212; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v8, v0 2213; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v8, v[12:13] 2214; GFX9-G-O0-NEXT: v_lshrrev_b64 v[17:18], v14, v[12:13] 2215; GFX9-G-O0-NEXT: v_lshlrev_b64 v[15:16], v8, v[10:11] 2216; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v17 2217; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v18 2218; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v15 2219; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v16 2220; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v11, v14 2221; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v8, v10 2222; GFX9-G-O0-NEXT: v_lshlrev_b64 v[12:13], v2, v[12:13] 2223; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v0 2224; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1 2225; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, 0 2226; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0 2227; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[8:9] 2228; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[8:9] 2229; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2230; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2 2231; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v12 2232; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v13 2233; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v11, s[8:9] 2234; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[8:9] 2235; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7] 2236; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7] 2237; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 2238; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2 2239; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec 2240; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8 2241; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9 2242; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill 2243; GFX9-G-O0-NEXT: s_nop 0 2244; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill 2245; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill 2246; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill 2247; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], s[4:5] 2248; GFX9-G-O0-NEXT: s_mov_b64 s[10:11], s[4:5] 2249; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v7 2250; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v5, v6 2251; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 2252; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6 2253; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s5 2254; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4 2255; GFX9-G-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[4:5], v[6:7] 2256; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill 2257; GFX9-G-O0-NEXT: s_nop 0 2258; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill 2259; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill 2260; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill 2261; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8 2262; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s9 2263; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s10 2264; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s11 2265; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill 2266; GFX9-G-O0-NEXT: s_nop 0 2267; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill 2268; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill 2269; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill 2270; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], exec 2271; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] 2272; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] 2273; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 4 2274; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 5 2275; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 2276; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill 2277; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] 2278; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] 2279; GFX9-G-O0-NEXT: s_cbranch_execz .LBB0_5 2280; GFX9-G-O0-NEXT: s_branch .LBB0_7 2281; GFX9-G-O0-NEXT: .LBB0_9: ; %udiv-end 2282; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload 2283; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload 2284; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload 2285; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload 2286; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload 2287; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload 2288; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload 2289; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload 2290; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) 2291; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v8 2292; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9 2293; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 2294; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v10 2295; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v11 2296; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v1 2297; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2 2298; GFX9-G-O0-NEXT: v_xor_b32_e64 v0, v0, v7 2299; GFX9-G-O0-NEXT: v_xor_b32_e64 v1, v1, v6 2300; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8 2301; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9 2302; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v2, v5 2303; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v3, v4 2304; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v0, s[4:5], v0, v7 2305; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v1, s[4:5], v1, v6, s[4:5] 2306; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v5, s[4:5] 2307; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[4:5], v3, v4, s[4:5] 2308; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 2309; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload 2310; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] 2311; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 2312; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31] 2313 %div = sdiv i128 %lhs, %rhs 2314 ret i128 %div 2315} 2316 2317define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { 2318; GFX9-LABEL: v_udiv_i128_vv: 2319; GFX9: ; %bb.0: ; %_udiv-special-cases 2320; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2321; GFX9-NEXT: v_or_b32_e32 v9, v5, v7 2322; GFX9-NEXT: v_or_b32_e32 v8, v4, v6 2323; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 2324; GFX9-NEXT: v_or_b32_e32 v9, v1, v3 2325; GFX9-NEXT: v_or_b32_e32 v8, v0, v2 2326; GFX9-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[8:9] 2327; GFX9-NEXT: v_ffbh_u32_e32 v8, v6 2328; GFX9-NEXT: v_add_u32_e32 v8, 32, v8 2329; GFX9-NEXT: v_ffbh_u32_e32 v9, v7 2330; GFX9-NEXT: v_min_u32_e32 v8, v8, v9 2331; GFX9-NEXT: v_ffbh_u32_e32 v9, v4 2332; GFX9-NEXT: v_add_u32_e32 v9, 32, v9 2333; GFX9-NEXT: v_ffbh_u32_e32 v10, v5 2334; GFX9-NEXT: v_min_u32_e32 v9, v9, v10 2335; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2336; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, 64, v9 2337; GFX9-NEXT: v_addc_co_u32_e64 v10, s[6:7], 0, 0, vcc 2338; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] 2339; GFX9-NEXT: v_ffbh_u32_e32 v11, v3 2340; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc 2341; GFX9-NEXT: v_ffbh_u32_e32 v9, v2 2342; GFX9-NEXT: v_add_u32_e32 v9, 32, v9 2343; GFX9-NEXT: v_min_u32_e32 v9, v9, v11 2344; GFX9-NEXT: v_ffbh_u32_e32 v11, v0 2345; GFX9-NEXT: v_add_u32_e32 v11, 32, v11 2346; GFX9-NEXT: v_ffbh_u32_e32 v12, v1 2347; GFX9-NEXT: v_min_u32_e32 v11, v11, v12 2348; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc 2349; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, 64, v11 2350; GFX9-NEXT: v_addc_co_u32_e64 v12, s[6:7], 0, 0, vcc 2351; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 2352; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f 2353; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc 2354; GFX9-NEXT: v_cndmask_b32_e64 v13, v12, 0, vcc 2355; GFX9-NEXT: v_sub_co_u32_e32 v12, vcc, v8, v9 2356; GFX9-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v13, vcc 2357; GFX9-NEXT: v_mov_b32_e32 v8, 0 2358; GFX9-NEXT: v_subbrev_co_u32_e32 v14, vcc, 0, v8, vcc 2359; GFX9-NEXT: v_subbrev_co_u32_e32 v15, vcc, 0, v8, vcc 2360; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[12:13] 2361; GFX9-NEXT: v_or_b32_e32 v10, v13, v15 2362; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2363; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] 2364; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2365; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] 2366; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc 2367; GFX9-NEXT: v_and_b32_e32 v8, 1, v8 2368; GFX9-NEXT: v_xor_b32_e32 v9, 0x7f, v12 2369; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 2370; GFX9-NEXT: v_or_b32_e32 v9, v9, v14 2371; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2372; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[9:10] 2373; GFX9-NEXT: s_xor_b64 s[6:7], s[4:5], -1 2374; GFX9-NEXT: v_cndmask_b32_e64 v8, v3, 0, s[4:5] 2375; GFX9-NEXT: v_cndmask_b32_e64 v9, v2, 0, s[4:5] 2376; GFX9-NEXT: v_cndmask_b32_e64 v10, v1, 0, s[4:5] 2377; GFX9-NEXT: v_cndmask_b32_e64 v11, v0, 0, s[4:5] 2378; GFX9-NEXT: s_and_b64 s[4:5], s[6:7], vcc 2379; GFX9-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] 2380; GFX9-NEXT: s_cbranch_execz .LBB1_6 2381; GFX9-NEXT: ; %bb.1: ; %udiv-bb1 2382; GFX9-NEXT: v_add_co_u32_e32 v18, vcc, 1, v12 2383; GFX9-NEXT: v_addc_co_u32_e32 v19, vcc, 0, v13, vcc 2384; GFX9-NEXT: v_addc_co_u32_e32 v20, vcc, 0, v14, vcc 2385; GFX9-NEXT: v_addc_co_u32_e32 v21, vcc, 0, v15, vcc 2386; GFX9-NEXT: v_sub_u32_e32 v15, 0x7f, v12 2387; GFX9-NEXT: v_or_b32_e32 v9, v19, v21 2388; GFX9-NEXT: v_or_b32_e32 v8, v18, v20 2389; GFX9-NEXT: v_sub_u32_e32 v13, 64, v15 2390; GFX9-NEXT: v_lshlrev_b64 v[10:11], v15, v[2:3] 2391; GFX9-NEXT: v_lshrrev_b64 v[13:14], v13, v[0:1] 2392; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] 2393; GFX9-NEXT: v_sub_u32_e32 v8, 63, v12 2394; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[0:1] 2395; GFX9-NEXT: v_or_b32_e32 v11, v11, v14 2396; GFX9-NEXT: v_or_b32_e32 v10, v10, v13 2397; GFX9-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v15 2398; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5] 2399; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] 2400; GFX9-NEXT: v_lshlrev_b64 v[10:11], v15, v[0:1] 2401; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v15 2402; GFX9-NEXT: v_mov_b32_e32 v12, 0 2403; GFX9-NEXT: v_mov_b32_e32 v14, 0 2404; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v3, s[6:7] 2405; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v2, s[6:7] 2406; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, v11, s[4:5] 2407; GFX9-NEXT: v_mov_b32_e32 v13, 0 2408; GFX9-NEXT: v_mov_b32_e32 v15, 0 2409; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5] 2410; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 2411; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 2412; GFX9-NEXT: s_cbranch_execz .LBB1_5 2413; GFX9-NEXT: ; %bb.2: ; %udiv-preheader 2414; GFX9-NEXT: v_sub_u32_e32 v14, 64, v18 2415; GFX9-NEXT: v_lshrrev_b64 v[12:13], v18, v[0:1] 2416; GFX9-NEXT: v_lshlrev_b64 v[14:15], v14, v[2:3] 2417; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v18 2418; GFX9-NEXT: v_or_b32_e32 v14, v12, v14 2419; GFX9-NEXT: v_subrev_u32_e32 v12, 64, v18 2420; GFX9-NEXT: v_or_b32_e32 v15, v13, v15 2421; GFX9-NEXT: v_lshrrev_b64 v[12:13], v12, v[2:3] 2422; GFX9-NEXT: v_lshrrev_b64 v[2:3], v18, v[2:3] 2423; GFX9-NEXT: v_cndmask_b32_e32 v13, v13, v15, vcc 2424; GFX9-NEXT: v_cndmask_b32_e32 v12, v12, v14, vcc 2425; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc 2426; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 2427; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, -1, v4 2428; GFX9-NEXT: v_addc_co_u32_e32 v23, vcc, -1, v5, vcc 2429; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v18 2430; GFX9-NEXT: v_addc_co_u32_e32 v24, vcc, -1, v6, vcc 2431; GFX9-NEXT: v_mov_b32_e32 v16, 0 2432; GFX9-NEXT: v_mov_b32_e32 v14, 0 2433; GFX9-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[4:5] 2434; GFX9-NEXT: v_cndmask_b32_e64 v0, v12, v0, s[4:5] 2435; GFX9-NEXT: v_addc_co_u32_e32 v25, vcc, -1, v7, vcc 2436; GFX9-NEXT: s_mov_b64 s[4:5], 0 2437; GFX9-NEXT: v_mov_b32_e32 v17, 0 2438; GFX9-NEXT: v_mov_b32_e32 v15, 0 2439; GFX9-NEXT: v_mov_b32_e32 v13, 0 2440; GFX9-NEXT: .LBB1_3: ; %udiv-do-while 2441; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 2442; GFX9-NEXT: v_lshrrev_b32_e32 v12, 31, v11 2443; GFX9-NEXT: v_lshlrev_b64 v[10:11], 1, v[10:11] 2444; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 2445; GFX9-NEXT: v_or_b32_e32 v10, v16, v10 2446; GFX9-NEXT: v_lshrrev_b32_e32 v16, 31, v1 2447; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 2448; GFX9-NEXT: v_or_b32_e32 v2, v2, v16 2449; GFX9-NEXT: v_lshrrev_b32_e32 v16, 31, v9 2450; GFX9-NEXT: v_or_b32_e32 v0, v0, v16 2451; GFX9-NEXT: v_sub_co_u32_e32 v16, vcc, v22, v0 2452; GFX9-NEXT: v_subb_co_u32_e32 v16, vcc, v23, v1, vcc 2453; GFX9-NEXT: v_subb_co_u32_e32 v16, vcc, v24, v2, vcc 2454; GFX9-NEXT: v_subb_co_u32_e32 v16, vcc, v25, v3, vcc 2455; GFX9-NEXT: v_ashrrev_i32_e32 v26, 31, v16 2456; GFX9-NEXT: v_and_b32_e32 v16, v26, v4 2457; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v16 2458; GFX9-NEXT: v_and_b32_e32 v16, v26, v5 2459; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v16, vcc 2460; GFX9-NEXT: v_and_b32_e32 v16, v26, v6 2461; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v16, vcc 2462; GFX9-NEXT: v_and_b32_e32 v16, v26, v7 2463; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v16, vcc 2464; GFX9-NEXT: v_add_co_u32_e32 v18, vcc, -1, v18 2465; GFX9-NEXT: v_addc_co_u32_e32 v19, vcc, -1, v19, vcc 2466; GFX9-NEXT: v_addc_co_u32_e32 v20, vcc, -1, v20, vcc 2467; GFX9-NEXT: v_addc_co_u32_e32 v21, vcc, -1, v21, vcc 2468; GFX9-NEXT: v_or_b32_e32 v11, v17, v11 2469; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[8:9] 2470; GFX9-NEXT: v_or_b32_e32 v16, v18, v20 2471; GFX9-NEXT: v_or_b32_e32 v17, v19, v21 2472; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] 2473; GFX9-NEXT: v_or3_b32 v8, v8, v12, v14 2474; GFX9-NEXT: v_and_b32_e32 v12, 1, v26 2475; GFX9-NEXT: v_mov_b32_e32 v17, v13 2476; GFX9-NEXT: v_or3_b32 v9, v9, 0, v15 2477; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2478; GFX9-NEXT: v_mov_b32_e32 v16, v12 2479; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 2480; GFX9-NEXT: s_cbranch_execnz .LBB1_3 2481; GFX9-NEXT: ; %bb.4: ; %Flow 2482; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 2483; GFX9-NEXT: .LBB1_5: ; %Flow2 2484; GFX9-NEXT: s_or_b64 exec, exec, s[6:7] 2485; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[10:11] 2486; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[8:9] 2487; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v11 2488; GFX9-NEXT: v_or3_b32 v8, v3, 0, v15 2489; GFX9-NEXT: v_or3_b32 v9, v2, v4, v14 2490; GFX9-NEXT: v_or_b32_e32 v10, v13, v1 2491; GFX9-NEXT: v_or_b32_e32 v11, v12, v0 2492; GFX9-NEXT: .LBB1_6: ; %Flow3 2493; GFX9-NEXT: s_or_b64 exec, exec, s[8:9] 2494; GFX9-NEXT: v_mov_b32_e32 v0, v11 2495; GFX9-NEXT: v_mov_b32_e32 v1, v10 2496; GFX9-NEXT: v_mov_b32_e32 v2, v9 2497; GFX9-NEXT: v_mov_b32_e32 v3, v8 2498; GFX9-NEXT: s_setpc_b64 s[30:31] 2499; 2500; GFX9-O0-LABEL: v_udiv_i128_vv: 2501; GFX9-O0: ; %bb.0: ; %_udiv-special-cases 2502; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2503; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 2504; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill 2505; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 2506; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6 2507; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill 2508; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2 2509; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0 2510; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload 2511; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2512; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2513; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 2514; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 2515; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2516; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2517; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 2518; GFX9-O0-NEXT: v_mov_b32_e32 v13, v3 2519; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2520; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2521; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2522; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 2523; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2524; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2525; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec 2526; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7 2527; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 2528; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 2529; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 2530; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 2531; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10 2532; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 2533; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill 2534; GFX9-O0-NEXT: s_nop 0 2535; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill 2536; GFX9-O0-NEXT: s_waitcnt vmcnt(2) 2537; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 2538; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 2539; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill 2540; GFX9-O0-NEXT: s_nop 0 2541; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill 2542; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12 2543; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 2544; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill 2545; GFX9-O0-NEXT: s_nop 0 2546; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill 2547; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 2548; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9 2549; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill 2550; GFX9-O0-NEXT: s_nop 0 2551; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill 2552; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 2553; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 2554; GFX9-O0-NEXT: v_or_b32_e64 v2, v7, v6 2555; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 2556; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 2557; GFX9-O0-NEXT: v_or_b32_e64 v0, v4, v5 2558; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2559; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 2560; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 2561; GFX9-O0-NEXT: ; implicit-def: $vgpr30 : SGPR spill to VGPR lane 2562; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 0 2563; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 1 2564; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[0:1], s[6:7] 2565; GFX9-O0-NEXT: v_mov_b32_e32 v1, v13 2566; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9 2567; GFX9-O0-NEXT: v_or_b32_e64 v14, v3, v1 2568; GFX9-O0-NEXT: v_mov_b32_e32 v0, v12 2569; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 2570; GFX9-O0-NEXT: v_or_b32_e64 v8, v2, v0 2571; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 2572; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14 2573; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7] 2574; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 2575; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v5 2576; GFX9-O0-NEXT: s_mov_b32 s9, 32 2577; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9 2578; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6 2579; GFX9-O0-NEXT: v_min_u32_e64 v5, v5, v6 2580; GFX9-O0-NEXT: s_mov_b32 s8, 0 2581; GFX9-O0-NEXT: ; implicit-def: $sgpr10 2582; GFX9-O0-NEXT: v_mov_b32_e32 v8, s8 2583; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 2584; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 2585; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6 2586; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v4 2587; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9 2588; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7 2589; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v7 2590; GFX9-O0-NEXT: ; implicit-def: $sgpr10 2591; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 2592; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 2593; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4 2594; GFX9-O0-NEXT: s_mov_b64 s[10:11], 64 2595; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 2596; GFX9-O0-NEXT: s_mov_b32 s12, s10 2597; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15 2598; GFX9-O0-NEXT: s_mov_b32 s14, s11 2599; GFX9-O0-NEXT: v_add_co_u32_e64 v7, s[12:13], v7, s12 2600; GFX9-O0-NEXT: v_mov_b32_e32 v8, s14 2601; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13] 2602; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec 2603; GFX9-O0-NEXT: v_mov_b32_e32 v8, v4 2604; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 2605; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7] 2606; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[10:11], s[12:13] 2607; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[12:13] 2608; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 2609; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 2610; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v5, v6, s[12:13] 2611; GFX9-O0-NEXT: ; implicit-def: $sgpr12 2612; GFX9-O0-NEXT: ; implicit-def: $sgpr12 2613; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 2614; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4 2615; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0 2616; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9 2617; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1 2618; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5 2619; GFX9-O0-NEXT: ; implicit-def: $sgpr12 2620; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 2621; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 2622; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 2623; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 2624; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2 2625; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9 2626; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3 2627; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v10 2628; GFX9-O0-NEXT: ; implicit-def: $sgpr9 2629; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 2630; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 2631; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4 2632; GFX9-O0-NEXT: v_mov_b32_e32 v10, v14 2633; GFX9-O0-NEXT: s_mov_b32 s8, s10 2634; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15 2635; GFX9-O0-NEXT: s_mov_b32 s10, s11 2636; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[8:9], v10, s8 2637; GFX9-O0-NEXT: v_mov_b32_e32 v11, s10 2638; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[8:9], v4, v11, s[8:9] 2639; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec 2640; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4 2641; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11 2642; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 2643; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[12:13], s[8:9] 2644; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[8:9] 2645; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 2646; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 2647; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9] 2648; GFX9-O0-NEXT: ; implicit-def: $sgpr8 2649; GFX9-O0-NEXT: ; implicit-def: $sgpr8 2650; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 2651; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 2652; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 2653; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 2654; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec 2655; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 2656; GFX9-O0-NEXT: s_mov_b32 s10, s6 2657; GFX9-O0-NEXT: s_mov_b32 s11, s7 2658; GFX9-O0-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v7 2659; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v5, v6, vcc 2660; GFX9-O0-NEXT: v_mov_b32_e32 v6, s10 2661; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10 2662; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v6, vcc 2663; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11 2664; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 2665; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc 2666; GFX9-O0-NEXT: ; implicit-def: $sgpr8 2667; GFX9-O0-NEXT: ; implicit-def: $sgpr8 2668; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 2669; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 2670; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill 2671; GFX9-O0-NEXT: s_nop 0 2672; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill 2673; GFX9-O0-NEXT: ; implicit-def: $sgpr8 2674; GFX9-O0-NEXT: ; implicit-def: $sgpr8 2675; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec 2676; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6 2677; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill 2678; GFX9-O0-NEXT: s_nop 0 2679; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill 2680; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7] 2681; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f 2682; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13] 2683; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15] 2684; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7] 2685; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15] 2686; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9] 2687; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6 2688; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1 2689; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] 2690; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1 2691; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] 2692; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 2693; GFX9-O0-NEXT: s_mov_b32 s14, s13 2694; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14 2695; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13 2696; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12 2697; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 2698; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 2699; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 2700; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 2701; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v9 2702; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec 2703; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 2704; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 2705; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 2706; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 2707; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7] 2708; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 2709; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9] 2710; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 2711; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9] 2712; GFX9-O0-NEXT: ; implicit-def: $sgpr12 2713; GFX9-O0-NEXT: ; implicit-def: $sgpr12 2714; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2715; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 2716; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 2717; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9] 2718; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10 2719; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9] 2720; GFX9-O0-NEXT: ; implicit-def: $sgpr8 2721; GFX9-O0-NEXT: ; implicit-def: $sgpr8 2722; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2723; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 2724; GFX9-O0-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7] 2725; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill 2726; GFX9-O0-NEXT: s_nop 0 2727; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 2728; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 2729; GFX9-O0-NEXT: s_nop 0 2730; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 2731; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec 2732; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 2 2733; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 3 2734; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 2735; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 2736; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 2737; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 2738; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 2739; GFX9-O0-NEXT: s_cbranch_execz .LBB1_3 2740; GFX9-O0-NEXT: s_branch .LBB1_8 2741; GFX9-O0-NEXT: .LBB1_1: ; %Flow 2742; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 2743; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 2744; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 2745; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2746; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4 2747; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5 2748; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] 2749; GFX9-O0-NEXT: ; %bb.2: ; %Flow 2750; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload 2751; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload 2752; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload 2753; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload 2754; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload 2755; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload 2756; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload 2757; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload 2758; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 2759; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill 2760; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 2761; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill 2762; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 2763; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill 2764; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 2765; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill 2766; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 2767; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill 2768; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 2769; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill 2770; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 2771; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill 2772; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 2773; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill 2774; GFX9-O0-NEXT: s_branch .LBB1_5 2775; GFX9-O0-NEXT: .LBB1_3: ; %Flow2 2776; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 2777; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 2778; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 2779; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2780; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 2 2781; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 3 2782; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] 2783; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload 2784; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload 2785; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 2786; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload 2787; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 2788; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill 2789; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 2790; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill 2791; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill 2792; GFX9-O0-NEXT: s_nop 0 2793; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill 2794; GFX9-O0-NEXT: s_branch .LBB1_9 2795; GFX9-O0-NEXT: .LBB1_4: ; %udiv-loop-exit 2796; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload 2797; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload 2798; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload 2799; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload 2800; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload 2801; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload 2802; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload 2803; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload 2804; GFX9-O0-NEXT: s_mov_b32 s4, 1 2805; GFX9-O0-NEXT: s_waitcnt vmcnt(2) 2806; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[0:1] 2807; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2808; GFX9-O0-NEXT: v_lshlrev_b64 v[9:10], s4, v[9:10] 2809; GFX9-O0-NEXT: s_mov_b32 s4, 63 2810; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 2811; GFX9-O0-NEXT: v_mov_b32_e32 v11, v1 2812; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 2813; GFX9-O0-NEXT: v_mov_b32_e32 v12, v8 2814; GFX9-O0-NEXT: v_or3_b32 v4, v4, v11, v12 2815; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 2816; GFX9-O0-NEXT: v_mov_b32_e32 v0, v9 2817; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7 2818; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2819; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 2820; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 2821; GFX9-O0-NEXT: v_mov_b32_e32 v4, v6 2822; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v7 2823; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 2824; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 2825; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 2826; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2827; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 2828; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill 2829; GFX9-O0-NEXT: s_nop 0 2830; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 2831; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 2832; GFX9-O0-NEXT: s_nop 0 2833; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 2834; GFX9-O0-NEXT: s_branch .LBB1_3 2835; GFX9-O0-NEXT: .LBB1_5: ; %Flow1 2836; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 2837; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 2838; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 2839; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2840; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6 2841; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7 2842; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] 2843; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload 2844; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload 2845; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload 2846; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload 2847; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload 2848; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload 2849; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload 2850; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload 2851; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 2852; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill 2853; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 2854; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill 2855; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill 2856; GFX9-O0-NEXT: s_nop 0 2857; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill 2858; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill 2859; GFX9-O0-NEXT: s_nop 0 2860; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill 2861; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill 2862; GFX9-O0-NEXT: s_nop 0 2863; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill 2864; GFX9-O0-NEXT: s_branch .LBB1_4 2865; GFX9-O0-NEXT: .LBB1_6: ; %udiv-do-while 2866; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1 2867; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 2868; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 2869; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 2870; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2871; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 8 2872; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 9 2873; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload 2874; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload 2875; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload 2876; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload 2877; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload 2878; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload 2879; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload 2880; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload 2881; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload 2882; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload 2883; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload 2884; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload 2885; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload 2886; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload 2887; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload 2888; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload 2889; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload 2890; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload 2891; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload 2892; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload 2893; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload 2894; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload 2895; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload 2896; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload 2897; GFX9-O0-NEXT: s_mov_b32 s4, 63 2898; GFX9-O0-NEXT: s_waitcnt vmcnt(16) 2899; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3] 2900; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29 2901; GFX9-O0-NEXT: s_mov_b32 s5, 1 2902; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s5, v[22:23] 2903; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23 2904; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 2905; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28 2906; GFX9-O0-NEXT: v_mov_b32_e32 v5, v22 2907; GFX9-O0-NEXT: v_or_b32_e64 v22, v5, v10 2908; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec 2909; GFX9-O0-NEXT: v_mov_b32_e32 v23, v4 2910; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[2:3] 2911; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], s4, v[6:7] 2912; GFX9-O0-NEXT: v_mov_b32_e32 v2, v29 2913; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 2914; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 2915; GFX9-O0-NEXT: v_mov_b32_e32 v3, v28 2916; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec 2917; GFX9-O0-NEXT: v_or_b32_e64 v4, v3, v4 2918; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 2919; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 2920; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s5, v[0:1] 2921; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[6:7] 2922; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 2923; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 2924; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29 2925; GFX9-O0-NEXT: s_waitcnt vmcnt(10) 2926; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27 2927; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10 2928; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 2929; GFX9-O0-NEXT: v_mov_b32_e32 v0, v28 2930; GFX9-O0-NEXT: v_mov_b32_e32 v7, v26 2931; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7 2932; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2933; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 2934; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 2935; GFX9-O0-NEXT: s_waitcnt vmcnt(8) 2936; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25 2937; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 2938; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 2939; GFX9-O0-NEXT: v_mov_b32_e32 v2, v24 2940; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 2941; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2942; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 2943; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 2944; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 2945; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22 2946; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 2947; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 2948; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 2949; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 2950; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15 2951; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6 2952; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2953; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc 2954; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc 2955; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc 2956; GFX9-O0-NEXT: ; implicit-def: $sgpr5 2957; GFX9-O0-NEXT: ; implicit-def: $sgpr5 2958; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec 2959; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 2960; GFX9-O0-NEXT: v_ashrrev_i64 v[13:14], s4, v[11:12] 2961; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 2962; GFX9-O0-NEXT: s_mov_b64 s[4:5], 1 2963; GFX9-O0-NEXT: s_mov_b32 s8, s5 2964; GFX9-O0-NEXT: v_and_b32_e64 v12, v7, s8 2965; GFX9-O0-NEXT: v_mov_b32_e32 v11, v13 2966; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2967; GFX9-O0-NEXT: v_and_b32_e64 v14, v11, s4 2968; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 2969; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12 2970; GFX9-O0-NEXT: v_mov_b32_e32 v12, 0 2971; GFX9-O0-NEXT: v_mov_b32_e32 v13, 0 2972; GFX9-O0-NEXT: v_mov_b32_e32 v22, v21 2973; GFX9-O0-NEXT: v_and_b32_e64 v22, v7, v22 2974; GFX9-O0-NEXT: v_and_b32_e64 v20, v11, v20 2975; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec 2976; GFX9-O0-NEXT: v_mov_b32_e32 v21, v22 2977; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19 2978; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v22 2979; GFX9-O0-NEXT: v_and_b32_e64 v22, v11, v18 2980; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec 2981; GFX9-O0-NEXT: v_mov_b32_e32 v23, v7 2982; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22 2983; GFX9-O0-NEXT: v_mov_b32_e32 v18, v23 2984; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 2985; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21 2986; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v19 2987; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v18, vcc 2988; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v11, vcc 2989; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v7, vcc 2990; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2991; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2992; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 2993; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 2994; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2995; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2996; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 2997; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 2998; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8 2999; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 killed $vgpr8_vgpr9 killed $exec 3000; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 3001; GFX9-O0-NEXT: s_mov_b32 s5, s8 3002; GFX9-O0-NEXT: s_mov_b32 s4, s9 3003; GFX9-O0-NEXT: v_mov_b32_e32 v10, v16 3004; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 3005; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 3006; GFX9-O0-NEXT: v_add_co_u32_e32 v19, vcc, v11, v16 3007; GFX9-O0-NEXT: v_mov_b32_e32 v11, s4 3008; GFX9-O0-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v11, vcc 3009; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5 3010; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v10, v11, vcc 3011; GFX9-O0-NEXT: v_mov_b32_e32 v10, s4 3012; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v8, v10, vcc 3013; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3014; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3015; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec 3016; GFX9-O0-NEXT: v_mov_b32_e32 v20, v9 3017; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3018; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3019; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 3020; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8 3021; GFX9-O0-NEXT: v_mov_b32_e32 v8, v16 3022; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17 3023; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19 3024; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 3025; GFX9-O0-NEXT: v_mov_b32_e32 v21, v17 3026; GFX9-O0-NEXT: v_mov_b32_e32 v18, v20 3027; GFX9-O0-NEXT: v_or_b32_e64 v18, v18, v21 3028; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16 3029; GFX9-O0-NEXT: v_mov_b32_e32 v16, v19 3030; GFX9-O0-NEXT: v_or_b32_e64 v16, v16, v17 3031; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 3032; GFX9-O0-NEXT: v_mov_b32_e32 v17, v18 3033; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13] 3034; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 3035; GFX9-O0-NEXT: v_mov_b32_e32 v17, v3 3036; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2 3037; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill 3038; GFX9-O0-NEXT: s_nop 0 3039; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill 3040; GFX9-O0-NEXT: v_mov_b32_e32 v17, v1 3041; GFX9-O0-NEXT: v_mov_b32_e32 v16, v0 3042; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill 3043; GFX9-O0-NEXT: s_nop 0 3044; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill 3045; GFX9-O0-NEXT: v_mov_b32_e32 v17, v15 3046; GFX9-O0-NEXT: v_mov_b32_e32 v16, v14 3047; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill 3048; GFX9-O0-NEXT: s_nop 0 3049; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill 3050; GFX9-O0-NEXT: v_mov_b32_e32 v17, v13 3051; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12 3052; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill 3053; GFX9-O0-NEXT: s_nop 0 3054; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill 3055; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] 3056; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 4 3057; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 5 3058; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] 3059; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8 3060; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9 3061; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 3062; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 3063; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 3064; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill 3065; GFX9-O0-NEXT: s_nop 0 3066; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill 3067; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill 3068; GFX9-O0-NEXT: s_nop 0 3069; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill 3070; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill 3071; GFX9-O0-NEXT: s_nop 0 3072; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill 3073; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill 3074; GFX9-O0-NEXT: s_nop 0 3075; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill 3076; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill 3077; GFX9-O0-NEXT: s_nop 0 3078; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill 3079; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill 3080; GFX9-O0-NEXT: s_nop 0 3081; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill 3082; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill 3083; GFX9-O0-NEXT: s_nop 0 3084; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill 3085; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill 3086; GFX9-O0-NEXT: s_nop 0 3087; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill 3088; GFX9-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] 3089; GFX9-O0-NEXT: s_cbranch_execnz .LBB1_6 3090; GFX9-O0-NEXT: s_branch .LBB1_1 3091; GFX9-O0-NEXT: .LBB1_7: ; %udiv-preheader 3092; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 3093; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 3094; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 3095; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload 3096; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload 3097; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload 3098; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload 3099; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload 3100; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload 3101; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload 3102; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload 3103; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload 3104; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload 3105; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload 3106; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload 3107; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload 3108; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload 3109; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload 3110; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload 3111; GFX9-O0-NEXT: s_waitcnt vmcnt(9) 3112; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 3113; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 3114; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21] 3115; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 3116; GFX9-O0-NEXT: s_mov_b32 s6, 64 3117; GFX9-O0-NEXT: v_sub_u32_e64 v12, s6, v4 3118; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], v12, v[18:19] 3119; GFX9-O0-NEXT: v_mov_b32_e32 v12, v23 3120; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12 3121; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec 3122; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22 3123; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 3124; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 3125; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 3126; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 3127; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v4, s6 3128; GFX9-O0-NEXT: v_sub_u32_e64 v5, v4, s6 3129; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], v5, v[18:19] 3130; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 3131; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5] 3132; GFX9-O0-NEXT: s_mov_b32 s6, 0 3133; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, s6 3134; GFX9-O0-NEXT: v_mov_b32_e32 v12, v21 3135; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[6:7] 3136; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 3137; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22 3138; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] 3139; GFX9-O0-NEXT: v_mov_b32_e32 v7, v20 3140; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7] 3141; GFX9-O0-NEXT: ; implicit-def: $sgpr6 3142; GFX9-O0-NEXT: ; implicit-def: $sgpr6 3143; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 3144; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 3145; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[18:19] 3146; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5 3147; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 3148; GFX9-O0-NEXT: s_mov_b32 s8, s7 3149; GFX9-O0-NEXT: v_mov_b32_e32 v12, s8 3150; GFX9-O0-NEXT: v_cndmask_b32_e64 v12, v12, v15, s[4:5] 3151; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 3152; GFX9-O0-NEXT: s_mov_b32 s8, s6 3153; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 3154; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] 3155; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3156; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3157; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 3158; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 3159; GFX9-O0-NEXT: v_mov_b32_e32 v12, v13 3160; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14 3161; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 3162; GFX9-O0-NEXT: s_mov_b32 s5, s8 3163; GFX9-O0-NEXT: s_mov_b32 s4, s9 3164; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16 3165; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 3166; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 3167; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v16 3168; GFX9-O0-NEXT: v_mov_b32_e32 v16, s4 3169; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v15, v16, vcc 3170; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5 3171; GFX9-O0-NEXT: v_addc_co_u32_e32 v14, vcc, v14, v15, vcc 3172; GFX9-O0-NEXT: v_mov_b32_e32 v15, s4 3173; GFX9-O0-NEXT: v_addc_co_u32_e32 v13, vcc, v13, v15, vcc 3174; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3175; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3176; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 3177; GFX9-O0-NEXT: v_mov_b32_e32 v15, v13 3178; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3179; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3180; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 3181; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 3182; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 3183; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill 3184; GFX9-O0-NEXT: s_nop 0 3185; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill 3186; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill 3187; GFX9-O0-NEXT: s_nop 0 3188; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill 3189; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] 3190; GFX9-O0-NEXT: v_mov_b32_e32 v15, s9 3191; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8 3192; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7 3193; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 3194; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 8 3195; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 9 3196; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 3197; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 3198; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 3199; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill 3200; GFX9-O0-NEXT: s_nop 0 3201; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill 3202; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill 3203; GFX9-O0-NEXT: s_nop 0 3204; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill 3205; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill 3206; GFX9-O0-NEXT: s_nop 0 3207; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill 3208; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill 3209; GFX9-O0-NEXT: s_nop 0 3210; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill 3211; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill 3212; GFX9-O0-NEXT: s_nop 0 3213; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill 3214; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill 3215; GFX9-O0-NEXT: s_nop 0 3216; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill 3217; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill 3218; GFX9-O0-NEXT: s_nop 0 3219; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill 3220; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill 3221; GFX9-O0-NEXT: s_nop 0 3222; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill 3223; GFX9-O0-NEXT: s_branch .LBB1_6 3224; GFX9-O0-NEXT: .LBB1_8: ; %udiv-bb1 3225; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 3226; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 3227; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 3228; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload 3229; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload 3230; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload 3231; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload 3232; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload 3233; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload 3234; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload 3235; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload 3236; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1 3237; GFX9-O0-NEXT: s_mov_b32 s5, s6 3238; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 3239; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 3240; GFX9-O0-NEXT: s_mov_b32 s4, s7 3241; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 3242; GFX9-O0-NEXT: s_mov_b32 s8, s6 3243; GFX9-O0-NEXT: s_mov_b32 s9, s7 3244; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 3245; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 3246; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 3247; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4 3248; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4 3249; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 3250; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc 3251; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 3252; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc 3253; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9 3254; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc 3255; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3256; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3257; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 3258; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 3259; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3260; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3261; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3262; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 3263; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 3264; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 3265; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill 3266; GFX9-O0-NEXT: s_nop 0 3267; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill 3268; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 3269; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 3270; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill 3271; GFX9-O0-NEXT: s_nop 0 3272; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill 3273; GFX9-O0-NEXT: s_mov_b32 s4, 0x7f 3274; GFX9-O0-NEXT: v_sub_u32_e64 v2, s4, v3 3275; GFX9-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[10:11] 3276; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 3277; GFX9-O0-NEXT: s_mov_b32 s4, 64 3278; GFX9-O0-NEXT: v_sub_u32_e64 v13, s4, v2 3279; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], v13, v[6:7] 3280; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14 3281; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v15 3282; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec 3283; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 3284; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 3285; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 3286; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 3287; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5 3288; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v2, s4 3289; GFX9-O0-NEXT: s_mov_b32 s10, 63 3290; GFX9-O0-NEXT: v_sub_u32_e64 v3, s10, v3 3291; GFX9-O0-NEXT: v_lshlrev_b64 v[12:13], v3, v[6:7] 3292; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 3293; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[4:5] 3294; GFX9-O0-NEXT: s_mov_b32 s10, 0 3295; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v2, s10 3296; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11 3297; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[10:11] 3298; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 3299; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 3300; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] 3301; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 3302; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[10:11] 3303; GFX9-O0-NEXT: ; implicit-def: $sgpr10 3304; GFX9-O0-NEXT: ; implicit-def: $sgpr10 3305; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 3306; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3 3307; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], v2, v[6:7] 3308; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7 3309; GFX9-O0-NEXT: v_mov_b32_e32 v2, s9 3310; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] 3311; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec 3312; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8 3313; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v3, v6, s[4:5] 3314; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3315; GFX9-O0-NEXT: ; implicit-def: $sgpr4 3316; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 3317; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 3318; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill 3319; GFX9-O0-NEXT: s_nop 0 3320; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill 3321; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill 3322; GFX9-O0-NEXT: s_nop 0 3323; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill 3324; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 3325; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 3326; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 3327; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 3328; GFX9-O0-NEXT: v_mov_b32_e32 v0, v8 3329; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1 3330; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3331; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 3332; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7] 3333; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 3334; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8 3335; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9 3336; GFX9-O0-NEXT: v_mov_b32_e32 v0, s6 3337; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7 3338; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill 3339; GFX9-O0-NEXT: s_nop 0 3340; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill 3341; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill 3342; GFX9-O0-NEXT: s_nop 0 3343; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill 3344; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill 3345; GFX9-O0-NEXT: s_nop 0 3346; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill 3347; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill 3348; GFX9-O0-NEXT: s_nop 0 3349; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill 3350; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec 3351; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] 3352; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] 3353; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6 3354; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7 3355; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 3356; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 3357; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 3358; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 3359; GFX9-O0-NEXT: s_cbranch_execz .LBB1_5 3360; GFX9-O0-NEXT: s_branch .LBB1_7 3361; GFX9-O0-NEXT: .LBB1_9: ; %udiv-end 3362; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload 3363; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload 3364; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload 3365; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload 3366; GFX9-O0-NEXT: s_mov_b32 s4, 32 3367; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 3368; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[6:7] 3369; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 3370; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[4:5] 3371; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 3372; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6 3373; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4 3374; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 3375; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload 3376; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 3377; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 3378; GFX9-O0-NEXT: s_setpc_b64 s[30:31] 3379; 3380; GFX9-G-LABEL: v_udiv_i128_vv: 3381; GFX9-G: ; %bb.0: ; %_udiv-special-cases 3382; GFX9-G-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3383; GFX9-G-NEXT: v_or_b32_e32 v8, v4, v6 3384; GFX9-G-NEXT: v_or_b32_e32 v9, v5, v7 3385; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 3386; GFX9-G-NEXT: v_or_b32_e32 v8, v0, v2 3387; GFX9-G-NEXT: v_or_b32_e32 v9, v1, v3 3388; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[8:9] 3389; GFX9-G-NEXT: v_ffbh_u32_e32 v9, v4 3390; GFX9-G-NEXT: v_ffbh_u32_e32 v8, v5 3391; GFX9-G-NEXT: v_add_u32_e32 v9, 32, v9 3392; GFX9-G-NEXT: v_ffbh_u32_e32 v10, v6 3393; GFX9-G-NEXT: v_min_u32_e32 v8, v8, v9 3394; GFX9-G-NEXT: v_ffbh_u32_e32 v9, v7 3395; GFX9-G-NEXT: v_add_u32_e32 v10, 32, v10 3396; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[6:7] 3397; GFX9-G-NEXT: v_add_u32_e32 v8, 64, v8 3398; GFX9-G-NEXT: v_min_u32_e32 v9, v9, v10 3399; GFX9-G-NEXT: v_ffbh_u32_e32 v10, v0 3400; GFX9-G-NEXT: v_cndmask_b32_e64 v8, v9, v8, s[6:7] 3401; GFX9-G-NEXT: v_ffbh_u32_e32 v9, v1 3402; GFX9-G-NEXT: v_add_u32_e32 v10, 32, v10 3403; GFX9-G-NEXT: v_ffbh_u32_e32 v11, v2 3404; GFX9-G-NEXT: v_min_u32_e32 v9, v9, v10 3405; GFX9-G-NEXT: v_ffbh_u32_e32 v10, v3 3406; GFX9-G-NEXT: v_add_u32_e32 v11, 32, v11 3407; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[2:3] 3408; GFX9-G-NEXT: v_add_u32_e32 v9, 64, v9 3409; GFX9-G-NEXT: v_min_u32_e32 v10, v10, v11 3410; GFX9-G-NEXT: v_cndmask_b32_e64 v9, v10, v9, s[6:7] 3411; GFX9-G-NEXT: v_sub_co_u32_e64 v12, s[6:7], v8, v9 3412; GFX9-G-NEXT: v_subb_co_u32_e64 v13, s[6:7], 0, 0, s[6:7] 3413; GFX9-G-NEXT: v_mov_b32_e32 v8, 0x7f 3414; GFX9-G-NEXT: v_subb_co_u32_e64 v14, s[6:7], 0, 0, s[6:7] 3415; GFX9-G-NEXT: v_mov_b32_e32 v9, 0 3416; GFX9-G-NEXT: v_subb_co_u32_e64 v15, s[6:7], 0, 0, s[6:7] 3417; GFX9-G-NEXT: v_cmp_gt_u64_e64 s[6:7], v[12:13], v[8:9] 3418; GFX9-G-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3419; GFX9-G-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[6:7] 3420; GFX9-G-NEXT: v_cmp_lt_u64_e64 s[6:7], 0, v[14:15] 3421; GFX9-G-NEXT: v_or_b32_e32 v17, v13, v15 3422; GFX9-G-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[6:7] 3423; GFX9-G-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[14:15] 3424; GFX9-G-NEXT: s_mov_b64 s[8:9], 0 3425; GFX9-G-NEXT: v_cndmask_b32_e64 v8, v9, v8, s[6:7] 3426; GFX9-G-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 3427; GFX9-G-NEXT: v_or_b32_e32 v18, v9, v8 3428; GFX9-G-NEXT: v_xor_b32_e32 v8, 0x7f, v12 3429; GFX9-G-NEXT: v_or_b32_e32 v16, v8, v14 3430; GFX9-G-NEXT: v_and_b32_e32 v8, 1, v18 3431; GFX9-G-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 3432; GFX9-G-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc 3433; GFX9-G-NEXT: v_cndmask_b32_e64 v11, v1, 0, vcc 3434; GFX9-G-NEXT: v_cndmask_b32_e64 v8, v2, 0, vcc 3435; GFX9-G-NEXT: v_cndmask_b32_e64 v9, v3, 0, vcc 3436; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] 3437; GFX9-G-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 3438; GFX9-G-NEXT: v_or_b32_e32 v16, v18, v16 3439; GFX9-G-NEXT: v_and_b32_e32 v16, 1, v16 3440; GFX9-G-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 3441; GFX9-G-NEXT: s_xor_b64 s[4:5], vcc, -1 3442; GFX9-G-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 3443; GFX9-G-NEXT: s_cbranch_execz .LBB1_6 3444; GFX9-G-NEXT: ; %bb.1: ; %udiv-bb1 3445; GFX9-G-NEXT: v_add_co_u32_e32 v18, vcc, 1, v12 3446; GFX9-G-NEXT: v_addc_co_u32_e32 v19, vcc, 0, v13, vcc 3447; GFX9-G-NEXT: v_addc_co_u32_e32 v20, vcc, 0, v14, vcc 3448; GFX9-G-NEXT: v_addc_co_u32_e32 v21, vcc, 0, v15, vcc 3449; GFX9-G-NEXT: s_xor_b64 s[4:5], vcc, -1 3450; GFX9-G-NEXT: v_sub_co_u32_e32 v16, vcc, 0x7f, v12 3451; GFX9-G-NEXT: v_sub_u32_e32 v8, 64, v16 3452; GFX9-G-NEXT: v_lshrrev_b64 v[8:9], v8, v[0:1] 3453; GFX9-G-NEXT: v_lshlrev_b64 v[10:11], v16, v[2:3] 3454; GFX9-G-NEXT: v_add_u32_e32 v14, 0xffffffc0, v16 3455; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], v16, v[0:1] 3456; GFX9-G-NEXT: v_or_b32_e32 v10, v8, v10 3457; GFX9-G-NEXT: v_or_b32_e32 v11, v9, v11 3458; GFX9-G-NEXT: v_lshlrev_b64 v[8:9], v14, v[0:1] 3459; GFX9-G-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 3460; GFX9-G-NEXT: s_mov_b64 s[10:11], s[8:9] 3461; GFX9-G-NEXT: v_cndmask_b32_e32 v14, 0, v12, vcc 3462; GFX9-G-NEXT: v_cndmask_b32_e32 v15, 0, v13, vcc 3463; GFX9-G-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc 3464; GFX9-G-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc 3465; GFX9-G-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 3466; GFX9-G-NEXT: v_mov_b32_e32 v13, s11 3467; GFX9-G-NEXT: v_cndmask_b32_e32 v8, v8, v2, vcc 3468; GFX9-G-NEXT: v_cndmask_b32_e32 v9, v9, v3, vcc 3469; GFX9-G-NEXT: v_mov_b32_e32 v11, s9 3470; GFX9-G-NEXT: v_mov_b32_e32 v10, s8 3471; GFX9-G-NEXT: v_mov_b32_e32 v12, s10 3472; GFX9-G-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] 3473; GFX9-G-NEXT: s_xor_b64 s[12:13], exec, s[8:9] 3474; GFX9-G-NEXT: s_cbranch_execz .LBB1_5 3475; GFX9-G-NEXT: ; %bb.2: ; %udiv-preheader 3476; GFX9-G-NEXT: v_sub_u32_e32 v12, 64, v18 3477; GFX9-G-NEXT: v_add_u32_e32 v22, 0xffffffc0, v18 3478; GFX9-G-NEXT: v_lshrrev_b64 v[10:11], v18, v[0:1] 3479; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], v12, v[2:3] 3480; GFX9-G-NEXT: v_lshrrev_b64 v[16:17], v18, v[2:3] 3481; GFX9-G-NEXT: v_lshrrev_b64 v[2:3], v22, v[2:3] 3482; GFX9-G-NEXT: v_or_b32_e32 v10, v10, v12 3483; GFX9-G-NEXT: v_or_b32_e32 v11, v11, v13 3484; GFX9-G-NEXT: v_cmp_gt_u32_e32 vcc, 64, v18 3485; GFX9-G-NEXT: s_mov_b64 s[8:9], 0 3486; GFX9-G-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 3487; GFX9-G-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc 3488; GFX9-G-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc 3489; GFX9-G-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc 3490; GFX9-G-NEXT: v_add_co_u32_e32 v22, vcc, -1, v4 3491; GFX9-G-NEXT: v_addc_co_u32_e32 v23, vcc, -1, v5, vcc 3492; GFX9-G-NEXT: s_mov_b64 s[10:11], s[8:9] 3493; GFX9-G-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v18 3494; GFX9-G-NEXT: v_addc_co_u32_e32 v24, vcc, -1, v6, vcc 3495; GFX9-G-NEXT: v_mov_b32_e32 v13, s11 3496; GFX9-G-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5] 3497; GFX9-G-NEXT: v_cndmask_b32_e64 v3, v3, v1, s[4:5] 3498; GFX9-G-NEXT: v_addc_co_u32_e32 v25, vcc, -1, v7, vcc 3499; GFX9-G-NEXT: v_mov_b32_e32 v1, 0 3500; GFX9-G-NEXT: v_mov_b32_e32 v11, s9 3501; GFX9-G-NEXT: v_mov_b32_e32 v10, s8 3502; GFX9-G-NEXT: v_mov_b32_e32 v12, s10 3503; GFX9-G-NEXT: .LBB1_3: ; %udiv-do-while 3504; GFX9-G-NEXT: ; =>This Inner Loop Header: Depth=1 3505; GFX9-G-NEXT: v_lshlrev_b64 v[12:13], 1, v[14:15] 3506; GFX9-G-NEXT: v_lshrrev_b32_e32 v0, 31, v15 3507; GFX9-G-NEXT: v_or_b32_e32 v14, v10, v12 3508; GFX9-G-NEXT: v_or_b32_e32 v15, v11, v13 3509; GFX9-G-NEXT: v_lshlrev_b64 v[10:11], 1, v[16:17] 3510; GFX9-G-NEXT: v_lshrrev_b32_e32 v12, 31, v3 3511; GFX9-G-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 3512; GFX9-G-NEXT: v_or_b32_e32 v10, v10, v12 3513; GFX9-G-NEXT: v_lshrrev_b32_e32 v12, 31, v9 3514; GFX9-G-NEXT: v_or_b32_e32 v2, v2, v12 3515; GFX9-G-NEXT: v_sub_co_u32_e32 v12, vcc, v22, v2 3516; GFX9-G-NEXT: v_subb_co_u32_e32 v12, vcc, v23, v3, vcc 3517; GFX9-G-NEXT: v_subb_co_u32_e32 v12, vcc, v24, v10, vcc 3518; GFX9-G-NEXT: v_subb_co_u32_e32 v12, vcc, v25, v11, vcc 3519; GFX9-G-NEXT: v_ashrrev_i32_e32 v12, 31, v12 3520; GFX9-G-NEXT: v_and_b32_e32 v13, v12, v4 3521; GFX9-G-NEXT: v_and_b32_e32 v16, v12, v5 3522; GFX9-G-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v13 3523; GFX9-G-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v16, vcc 3524; GFX9-G-NEXT: v_and_b32_e32 v13, v12, v6 3525; GFX9-G-NEXT: v_and_b32_e32 v17, v12, v7 3526; GFX9-G-NEXT: v_subb_co_u32_e32 v16, vcc, v10, v13, vcc 3527; GFX9-G-NEXT: v_subb_co_u32_e32 v17, vcc, v11, v17, vcc 3528; GFX9-G-NEXT: v_add_co_u32_e32 v18, vcc, -1, v18 3529; GFX9-G-NEXT: v_addc_co_u32_e32 v19, vcc, -1, v19, vcc 3530; GFX9-G-NEXT: v_addc_co_u32_e32 v20, vcc, -1, v20, vcc 3531; GFX9-G-NEXT: v_addc_co_u32_e32 v21, vcc, -1, v21, vcc 3532; GFX9-G-NEXT: v_or_b32_e32 v10, v18, v20 3533; GFX9-G-NEXT: v_or_b32_e32 v11, v19, v21 3534; GFX9-G-NEXT: v_lshlrev_b64 v[8:9], 1, v[8:9] 3535; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 3536; GFX9-G-NEXT: v_or_b32_e32 v8, v8, v0 3537; GFX9-G-NEXT: v_and_b32_e32 v0, 1, v12 3538; GFX9-G-NEXT: v_mov_b32_e32 v11, v1 3539; GFX9-G-NEXT: s_or_b64 s[8:9], vcc, s[8:9] 3540; GFX9-G-NEXT: v_mov_b32_e32 v10, v0 3541; GFX9-G-NEXT: s_andn2_b64 exec, exec, s[8:9] 3542; GFX9-G-NEXT: s_cbranch_execnz .LBB1_3 3543; GFX9-G-NEXT: ; %bb.4: ; %Flow 3544; GFX9-G-NEXT: s_or_b64 exec, exec, s[8:9] 3545; GFX9-G-NEXT: .LBB1_5: ; %Flow2 3546; GFX9-G-NEXT: s_or_b64 exec, exec, s[12:13] 3547; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], 1, v[14:15] 3548; GFX9-G-NEXT: v_lshlrev_b64 v[8:9], 1, v[8:9] 3549; GFX9-G-NEXT: v_lshrrev_b32_e32 v2, 31, v15 3550; GFX9-G-NEXT: v_or_b32_e32 v8, v8, v2 3551; GFX9-G-NEXT: v_or_b32_e32 v10, v10, v0 3552; GFX9-G-NEXT: v_or_b32_e32 v11, v11, v1 3553; GFX9-G-NEXT: .LBB1_6: ; %Flow3 3554; GFX9-G-NEXT: s_or_b64 exec, exec, s[6:7] 3555; GFX9-G-NEXT: v_mov_b32_e32 v0, v10 3556; GFX9-G-NEXT: v_mov_b32_e32 v1, v11 3557; GFX9-G-NEXT: v_mov_b32_e32 v2, v8 3558; GFX9-G-NEXT: v_mov_b32_e32 v3, v9 3559; GFX9-G-NEXT: s_setpc_b64 s[30:31] 3560; 3561; GFX9-G-O0-LABEL: v_udiv_i128_vv: 3562; GFX9-G-O0: ; %bb.0: ; %_udiv-special-cases 3563; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3564; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 3565; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill 3566; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] 3567; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v1 3568; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2 3569; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3 3570; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec 3571; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v10 3572; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9 3573; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v8 3574; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill 3575; GFX9-G-O0-NEXT: s_nop 0 3576; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill 3577; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill 3578; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill 3579; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5 3580; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v6 3581; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v7 3582; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec 3583; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v10 3584; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v9 3585; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8 3586; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill 3587; GFX9-G-O0-NEXT: s_nop 0 3588; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill 3589; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill 3590; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill 3591; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 3592; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5 3593; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v4 3594; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v7 3595; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v6 3596; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v9 3597; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 3598; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v12 3599; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v13 3600; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v8, v11 3601; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v9, v10 3602; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 3603; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 3604; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5 3605; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4 3606; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[6:7], v[8:9], v[10:11] 3607; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v1 3608; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v0 3609; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v3 3610; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v2 3611; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v9 3612; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 3613; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v12 3614; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v13 3615; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v8, v11 3616; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v9, v10 3617; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 3618; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 3619; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5 3620; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4 3621; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], v[10:11] 3622; GFX9-G-O0-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 3623; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5 3624; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v4 3625; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v7 3626; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v6 3627; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4 3628; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s5 3629; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], v[4:5] 3630; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v9 3631; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v10 3632; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v4 3633; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v5 3634; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 32 3635; GFX9-G-O0-NEXT: v_add_u32_e64 v5, v5, v6 3636; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v5 3637; GFX9-G-O0-NEXT: s_mov_b32 s10, 64 3638; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s10 3639; GFX9-G-O0-NEXT: v_add_u32_e64 v5, v4, v5 3640; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7 3641; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v8 3642; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v4 3643; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6 3644; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32 3645; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7 3646; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v6 3647; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[8:9] 3648; GFX9-G-O0-NEXT: s_mov_b32 s14, 0 3649; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v1 3650; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v0 3651; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v3 3652; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v2 3653; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s5 3654; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4 3655; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], v[5:6] 3656; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v10 3657; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v11 3658; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v5 3659; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6 3660; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32 3661; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7 3662; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v6 3663; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s10 3664; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v5, v6 3665; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8 3666; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v9 3667; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v5 3668; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v7 3669; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32 3670; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8 3671; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v7 3672; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9] 3673; GFX9-G-O0-NEXT: s_mov_b32 s13, 0 3674; GFX9-G-O0-NEXT: s_mov_b32 s11, 0 3675; GFX9-G-O0-NEXT: s_mov_b32 s12, 0 3676; GFX9-G-O0-NEXT: s_mov_b32 s10, 0 3677; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v5, s[8:9], v4, v5 3678; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill 3679; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s14 3680; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s14 3681; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v6, s[8:9], v4, v6, s[8:9] 3682; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill 3683; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s13 3684; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s12 3685; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v4, v7, s[8:9] 3686; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill 3687; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s11 3688; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10 3689; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[8:9], v4, v7, s[8:9] 3690; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill 3691; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], 0x7f 3692; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v5 3693; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v6 3694; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v8 3695; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v7 3696; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s9 3697; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s8 3698; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[12:13], v[11:12], v[13:14] 3699; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5 3700; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4 3701; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[10:11], v[9:10], v[11:12] 3702; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5 3703; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4 3704; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12] 3705; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 1 3706; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 3707; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[12:13] 3708; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1 3709; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 3710; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11] 3711; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[8:9] 3712; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1 3713; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 3714; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7] 3715; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v9 3716; GFX9-G-O0-NEXT: s_mov_b32 s7, 0x7f 3717; GFX9-G-O0-NEXT: s_mov_b32 s6, 0 3718; GFX9-G-O0-NEXT: v_xor_b32_e64 v5, v5, s7 3719; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v6, s6 3720; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v8 3721; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v6, v7 3722; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 3723; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7 3724; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s5 3725; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s4 3726; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[5:6], v[7:8] 3727; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v1 3728; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v0 3729; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3 3730; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v2 3731; GFX9-G-O0-NEXT: v_and_b32_e32 v0, 1, v4 3732; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v0 3733; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0 3734; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, 0 3735; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5 3736; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v6 3737; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[6:7] 3738; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[6:7] 3739; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3740; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2 3741; GFX9-G-O0-NEXT: v_and_b32_e32 v2, 1, v4 3742; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v2 3743; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0 3744; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0 3745; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v7 3746; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8 3747; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[6:7] 3748; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7] 3749; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 3750; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v2 3751; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec 3752; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5 3753; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6 3754; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 1 3755; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0 3756; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] 3757; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v5 3758; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v4 3759; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 3760; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], -1 3761; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] 3762; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 3763; GFX9-G-O0-NEXT: s_nop 0 3764; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 3765; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill 3766; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 3767; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], exec 3768; GFX9-G-O0-NEXT: ; implicit-def: $vgpr34 : SGPR spill to VGPR lane 3769; GFX9-G-O0-NEXT: v_writelane_b32 v34, s4, 0 3770; GFX9-G-O0-NEXT: v_writelane_b32 v34, s5, 1 3771; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 3772; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill 3773; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] 3774; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 3775; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] 3776; GFX9-G-O0-NEXT: s_cbranch_execz .LBB1_3 3777; GFX9-G-O0-NEXT: s_branch .LBB1_8 3778; GFX9-G-O0-NEXT: .LBB1_1: ; %Flow 3779; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 3780; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload 3781; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] 3782; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 3783; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 2 3784; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 3 3785; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] 3786; GFX9-G-O0-NEXT: ; %bb.2: ; %Flow 3787; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload 3788; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload 3789; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload 3790; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload 3791; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload 3792; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload 3793; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload 3794; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload 3795; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 3796; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill 3797; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 3798; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill 3799; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 3800; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill 3801; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 3802; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill 3803; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 3804; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill 3805; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 3806; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill 3807; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 3808; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill 3809; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) 3810; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill 3811; GFX9-G-O0-NEXT: s_branch .LBB1_5 3812; GFX9-G-O0-NEXT: .LBB1_3: ; %Flow2 3813; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 3814; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload 3815; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] 3816; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 3817; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 0 3818; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 1 3819; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] 3820; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 3821; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload 3822; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload 3823; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload 3824; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 3825; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill 3826; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 3827; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill 3828; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 3829; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill 3830; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 3831; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill 3832; GFX9-G-O0-NEXT: s_branch .LBB1_9 3833; GFX9-G-O0-NEXT: .LBB1_4: ; %udiv-loop-exit 3834; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload 3835; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload 3836; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload 3837; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload 3838; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload 3839; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload 3840; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload 3841; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload 3842; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) 3843; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4 3844; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5 3845; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 3846; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v6 3847; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v7 3848; GFX9-G-O0-NEXT: s_mov_b32 s4, 1 3849; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4 3850; GFX9-G-O0-NEXT: v_lshlrev_b64 v[10:11], v0, v[2:3] 3851; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4 3852; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[4:5] 3853; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr2 killed $exec 3854; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec 3855; GFX9-G-O0-NEXT: s_mov_b32 s4, 31 3856; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4 3857; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v6, v2, v3 3858; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0 3859; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v0 3860; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1 3861; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v14 3862; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15 3863; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16 3864; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17 3865; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v12 3866; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v13 3867; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v10 3868; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v11 3869; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v7 3870; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v1, v5 3871; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 3872; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v5 3873; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8 3874; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v9 3875; GFX9-G-O0-NEXT: v_or3_b32 v4, v4, v6, v7 3876; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v5 3877; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 3878; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v2 3879; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec 3880; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4 3881; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5 3882; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 3883; GFX9-G-O0-NEXT: s_nop 0 3884; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 3885; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill 3886; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 3887; GFX9-G-O0-NEXT: s_branch .LBB1_3 3888; GFX9-G-O0-NEXT: .LBB1_5: ; %Flow1 3889; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 3890; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload 3891; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] 3892; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 3893; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 4 3894; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 5 3895; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] 3896; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload 3897; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload 3898; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload 3899; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload 3900; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload 3901; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload 3902; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload 3903; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload 3904; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 3905; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill 3906; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 3907; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill 3908; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 3909; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill 3910; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 3911; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill 3912; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill 3913; GFX9-G-O0-NEXT: s_nop 0 3914; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill 3915; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill 3916; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill 3917; GFX9-G-O0-NEXT: s_branch .LBB1_4 3918; GFX9-G-O0-NEXT: .LBB1_6: ; %udiv-do-while 3919; GFX9-G-O0-NEXT: ; =>This Inner Loop Header: Depth=1 3920; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 3921; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload 3922; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] 3923; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 3924; GFX9-G-O0-NEXT: v_readlane_b32 s6, v34, 6 3925; GFX9-G-O0-NEXT: v_readlane_b32 s7, v34, 7 3926; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload 3927; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload 3928; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload 3929; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload 3930; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload 3931; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload 3932; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload 3933; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload 3934; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload 3935; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload 3936; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload 3937; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload 3938; GFX9-G-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload 3939; GFX9-G-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload 3940; GFX9-G-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload 3941; GFX9-G-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload 3942; GFX9-G-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload 3943; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload 3944; GFX9-G-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload 3945; GFX9-G-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload 3946; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload 3947; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload 3948; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload 3949; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload 3950; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 3951; GFX9-G-O0-NEXT: s_waitcnt vmcnt(18) 3952; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v2 3953; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3 3954; GFX9-G-O0-NEXT: s_waitcnt vmcnt(16) 3955; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4 3956; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5 3957; GFX9-G-O0-NEXT: s_mov_b32 s8, 1 3958; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8 3959; GFX9-G-O0-NEXT: v_lshlrev_b64 v[20:21], v2, v[0:1] 3960; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8 3961; GFX9-G-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[3:4] 3962; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec 3963; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec 3964; GFX9-G-O0-NEXT: s_mov_b32 s9, 31 3965; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s9 3966; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v3, v0, v1 3967; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0 3968; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4 3969; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5 3970; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v2, v3 3971; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v0, v1 3972; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr12_vgpr13 killed $exec 3973; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v14 3974; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v15 3975; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec 3976; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec 3977; GFX9-G-O0-NEXT: s_mov_b32 s9, 31 3978; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s9 3979; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v3, v0, v1 3980; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0 3981; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v20 3982; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v21 3983; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v2, v3 3984; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v0, v1 3985; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v12 3986; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v13 3987; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v14 3988; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15 3989; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8 3990; GFX9-G-O0-NEXT: v_lshlrev_b64 v[22:23], v0, v[2:3] 3991; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8 3992; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[12:13] 3993; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr2 killed $exec 3994; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec 3995; GFX9-G-O0-NEXT: s_mov_b32 s8, 31 3996; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8 3997; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v14, v2, v3 3998; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0 3999; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v0 4000; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1 4001; GFX9-G-O0-NEXT: s_waitcnt vmcnt(10) 4002; GFX9-G-O0-NEXT: v_mov_b32_e32 v28, v30 4003; GFX9-G-O0-NEXT: v_mov_b32_e32 v29, v31 4004; GFX9-G-O0-NEXT: s_waitcnt vmcnt(8) 4005; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v32 4006; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v33 4007; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v28 4008; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v29 4009; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v22 4010; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v23 4011; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v15 4012; GFX9-G-O0-NEXT: v_or_b32_e64 v13, v1, v13 4013; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4014; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v13 4015; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v20 4016; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v21 4017; GFX9-G-O0-NEXT: v_or3_b32 v12, v12, v14, v15 4018; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v13 4019; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 4020; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v2 4021; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec 4022; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v12 4023; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v13 4024; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 4025; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v11, s[8:9], v11, v4 4026; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v10, v9, s[8:9] 4027; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v8, v7, s[8:9] 4028; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v6, v5, s[8:9] 4029; GFX9-G-O0-NEXT: s_mov_b32 s8, 31 4030; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8 4031; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v8, v6, v10 4032; GFX9-G-O0-NEXT: s_mov_b32 s8, 31 4033; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8 4034; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v6, v6, v10 4035; GFX9-G-O0-NEXT: s_mov_b32 s9, 1 4036; GFX9-G-O0-NEXT: s_mov_b32 s8, 0 4037; GFX9-G-O0-NEXT: v_and_b32_e64 v12, v8, s9 4038; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v8, s8 4039; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 4040; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v10 4041; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5 4042; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4 4043; GFX9-G-O0-NEXT: ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec 4044; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v11 4045; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v10 4046; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v24 4047; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v25 4048; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v26 4049; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v27 4050; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v22 4051; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v23 4052; GFX9-G-O0-NEXT: v_and_b32_e64 v11, v8, v11 4053; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v8, v10 4054; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v20 4055; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v21 4056; GFX9-G-O0-NEXT: v_and_b32_e64 v8, v6, v8 4057; GFX9-G-O0-NEXT: v_and_b32_e64 v6, v6, v20 4058; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v4, s[8:9], v4, v11 4059; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v9, v10, s[8:9] 4060; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v9, s[8:9], v7, v8, s[8:9] 4061; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v5, v6, s[8:9] 4062; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec 4063; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v10 4064; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v9 4065; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8 4066; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v16 4067; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v17 4068; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v18 4069; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v19 4070; GFX9-G-O0-NEXT: s_mov_b32 s8, -1 4071; GFX9-G-O0-NEXT: s_mov_b32 s12, -1 4072; GFX9-G-O0-NEXT: s_mov_b32 s11, -1 4073; GFX9-G-O0-NEXT: s_mov_b32 s10, -1 4074; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s8 4075; GFX9-G-O0-NEXT: v_add_co_u32_e64 v16, s[8:9], v11, v16 4076; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s12 4077; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v17, s[8:9], v10, v11, s[8:9] 4078; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s11 4079; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v19, s[8:9], v9, v10, s[8:9] 4080; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s10 4081; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v18, s[8:9], v8, v9, s[8:9] 4082; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16 4083; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17 4084; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v19 4085; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v18 4086; GFX9-G-O0-NEXT: v_or_b32_e64 v16, v16, v19 4087; GFX9-G-O0-NEXT: v_or_b32_e64 v18, v17, v18 4088; GFX9-G-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 4089; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v18 4090; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, s5 4091; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, s4 4092; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[18:19] 4093; GFX9-G-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 4094; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v3 4095; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v2 4096; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v1 4097; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v0 4098; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill 4099; GFX9-G-O0-NEXT: s_nop 0 4100; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill 4101; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill 4102; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill 4103; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v15 4104; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v14 4105; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v13 4106; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v12 4107; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill 4108; GFX9-G-O0-NEXT: s_nop 0 4109; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill 4110; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill 4111; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill 4112; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5] 4113; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 2 4114; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 3 4115; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5] 4116; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 6 4117; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 7 4118; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 4119; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill 4120; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] 4121; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill 4122; GFX9-G-O0-NEXT: s_nop 0 4123; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill 4124; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill 4125; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill 4126; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill 4127; GFX9-G-O0-NEXT: s_nop 0 4128; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill 4129; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill 4130; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill 4131; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill 4132; GFX9-G-O0-NEXT: s_nop 0 4133; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill 4134; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill 4135; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill 4136; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill 4137; GFX9-G-O0-NEXT: s_nop 0 4138; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill 4139; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill 4140; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill 4141; GFX9-G-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] 4142; GFX9-G-O0-NEXT: s_cbranch_execnz .LBB1_6 4143; GFX9-G-O0-NEXT: s_branch .LBB1_1 4144; GFX9-G-O0-NEXT: .LBB1_7: ; %udiv-preheader 4145; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 4146; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload 4147; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] 4148; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload 4149; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload 4150; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload 4151; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload 4152; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload 4153; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload 4154; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload 4155; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload 4156; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload 4157; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload 4158; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload 4159; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload 4160; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload 4161; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload 4162; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload 4163; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload 4164; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload 4165; GFX9-G-O0-NEXT: s_mov_b32 s4, 64 4166; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) 4167; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v5 4168; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v4 4169; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 4170; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v7 4171; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v6 4172; GFX9-G-O0-NEXT: s_mov_b32 s5, 0xffffffc0 4173; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s5 4174; GFX9-G-O0-NEXT: v_add_u32_e64 v4, v12, v4 4175; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4 4176; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v12 4177; GFX9-G-O0-NEXT: s_mov_b32 s6, 0 4178; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4 4179; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v12, v6 4180; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s6 4181; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v12, v6 4182; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v12, v[20:21] 4183; GFX9-G-O0-NEXT: v_lshrrev_b64 v[25:26], v12, v[14:15] 4184; GFX9-G-O0-NEXT: v_lshlrev_b64 v[23:24], v5, v[20:21] 4185; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v25 4186; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v26 4187; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v23 4188; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v24 4189; GFX9-G-O0-NEXT: v_or_b32_e64 v13, v13, v22 4190; GFX9-G-O0-NEXT: v_or_b32_e64 v12, v5, v12 4191; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], 0 4192; GFX9-G-O0-NEXT: v_lshrrev_b64 v[20:21], v4, v[20:21] 4193; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v20 4194; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v21 4195; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v13, s[4:5] 4196; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5] 4197; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v14 4198; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v15 4199; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v13, s[6:7] 4200; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v12, v5, v12, s[6:7] 4201; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 4202; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v12 4203; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v6 4204; GFX9-G-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec 4205; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, 0 4206; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0 4207; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[4:5] 4208; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] 4209; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 4210; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v6 4211; GFX9-G-O0-NEXT: ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec 4212; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12 4213; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v13 4214; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v16 4215; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v17 4216; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v18 4217; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v19 4218; GFX9-G-O0-NEXT: s_mov_b32 s4, -1 4219; GFX9-G-O0-NEXT: s_mov_b32 s10, -1 4220; GFX9-G-O0-NEXT: s_mov_b32 s7, -1 4221; GFX9-G-O0-NEXT: s_mov_b32 s6, -1 4222; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s4 4223; GFX9-G-O0-NEXT: v_add_co_u32_e64 v15, s[4:5], v15, v16 4224; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill 4225; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s10 4226; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v14, s[4:5], v14, v15, s[4:5] 4227; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill 4228; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s7 4229; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v13, s[4:5], v13, v14, s[4:5] 4230; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill 4231; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s6 4232; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v12, s[4:5], v12, v13, s[4:5] 4233; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill 4234; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], s[8:9] 4235; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[8:9] 4236; GFX9-G-O0-NEXT: v_writelane_b32 v34, s8, 6 4237; GFX9-G-O0-NEXT: v_writelane_b32 v34, s9, 7 4238; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 4239; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill 4240; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] 4241; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s7 4242; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s6 4243; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s5 4244; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s4 4245; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill 4246; GFX9-G-O0-NEXT: s_nop 0 4247; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill 4248; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill 4249; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill 4250; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill 4251; GFX9-G-O0-NEXT: s_nop 0 4252; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill 4253; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill 4254; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill 4255; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill 4256; GFX9-G-O0-NEXT: s_nop 0 4257; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill 4258; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill 4259; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill 4260; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill 4261; GFX9-G-O0-NEXT: s_nop 0 4262; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill 4263; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill 4264; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill 4265; GFX9-G-O0-NEXT: s_branch .LBB1_6 4266; GFX9-G-O0-NEXT: .LBB1_8: ; %udiv-bb1 4267; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 4268; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload 4269; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] 4270; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload 4271; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload 4272; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload 4273; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload 4274; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload 4275; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload 4276; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload 4277; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload 4278; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 4279; GFX9-G-O0-NEXT: s_mov_b32 s6, 1 4280; GFX9-G-O0-NEXT: s_mov_b32 s10, 0 4281; GFX9-G-O0-NEXT: s_mov_b32 s9, 0 4282; GFX9-G-O0-NEXT: s_mov_b32 s8, 0 4283; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s6 4284; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 4285; GFX9-G-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v1, v4 4286; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill 4287; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s10 4288; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) 4289; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v3, v5, s[6:7] 4290; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s9 4291; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v2, v3, s[6:7] 4292; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8 4293; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v0, v2, s[6:7] 4294; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v4 4295; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v5 4296; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v7 4297; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v6 4298; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill 4299; GFX9-G-O0-NEXT: s_nop 0 4300; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill 4301; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill 4302; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill 4303; GFX9-G-O0-NEXT: s_mov_b32 s6, 0x7f 4304; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6 4305; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v3, s[6:7], v0, v1 4306; GFX9-G-O0-NEXT: s_mov_b32 s7, 64 4307; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v9 4308; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v8 4309; GFX9-G-O0-NEXT: s_mov_b32 s6, 0xffffffc0 4310; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6 4311; GFX9-G-O0-NEXT: v_add_u32_e64 v2, v3, v0 4312; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7 4313; GFX9-G-O0-NEXT: v_sub_u32_e64 v8, v0, v3 4314; GFX9-G-O0-NEXT: s_mov_b32 s6, 0 4315; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7 4316; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v3, v0 4317; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6 4318; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v3, v0 4319; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v3, v[12:13] 4320; GFX9-G-O0-NEXT: v_lshrrev_b64 v[17:18], v8, v[12:13] 4321; GFX9-G-O0-NEXT: v_lshlrev_b64 v[15:16], v3, v[10:11] 4322; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17 4323; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v18 4324; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v15 4325; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16 4326; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v9, v14 4327; GFX9-G-O0-NEXT: v_or_b32_e64 v3, v3, v8 4328; GFX9-G-O0-NEXT: v_lshlrev_b64 v[12:13], v2, v[12:13] 4329; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v0 4330; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1 4331; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, 0 4332; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0 4333; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[8:9] 4334; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[8:9] 4335; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4336; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2 4337; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v12 4338; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v13 4339; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[8:9] 4340; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9] 4341; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 4342; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v11 4343; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7] 4344; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7] 4345; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 4346; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2 4347; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec 4348; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8 4349; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9 4350; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill 4351; GFX9-G-O0-NEXT: s_nop 0 4352; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill 4353; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill 4354; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill 4355; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], s[4:5] 4356; GFX9-G-O0-NEXT: s_mov_b64 s[10:11], s[4:5] 4357; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v7 4358; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v5, v6 4359; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 4360; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6 4361; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s5 4362; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4 4363; GFX9-G-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[4:5], v[6:7] 4364; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill 4365; GFX9-G-O0-NEXT: s_nop 0 4366; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill 4367; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill 4368; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill 4369; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8 4370; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s9 4371; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s10 4372; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s11 4373; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill 4374; GFX9-G-O0-NEXT: s_nop 0 4375; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill 4376; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill 4377; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill 4378; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], exec 4379; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] 4380; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] 4381; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 4 4382; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 5 4383; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 4384; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill 4385; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] 4386; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] 4387; GFX9-G-O0-NEXT: s_cbranch_execz .LBB1_5 4388; GFX9-G-O0-NEXT: s_branch .LBB1_7 4389; GFX9-G-O0-NEXT: .LBB1_9: ; %udiv-end 4390; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload 4391; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload 4392; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload 4393; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload 4394; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) 4395; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v3 4396; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) 4397; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v4 4398; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) 4399; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5 4400; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 4401; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6 4402; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 4403; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload 4404; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] 4405; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) 4406; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31] 4407 %div = udiv i128 %lhs, %rhs 4408 ret i128 %div 4409} 4410 4411define i128 @v_sdiv_i128_v_pow2k(i128 %lhs) { 4412; GFX9-LABEL: v_sdiv_i128_v_pow2k: 4413; GFX9: ; %bb.0: 4414; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4415; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v3 4416; GFX9-NEXT: v_mov_b32_e32 v5, v4 4417; GFX9-NEXT: v_lshrrev_b64 v[4:5], 31, v[4:5] 4418; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4 4419; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v5, vcc 4420; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc 4421; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc 4422; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[2:3] 4423; GFX9-NEXT: v_lshrrev_b32_e32 v4, 1, v4 4424; GFX9-NEXT: v_ashrrev_i64 v[2:3], 33, v[2:3] 4425; GFX9-NEXT: v_or_b32_e32 v0, v4, v0 4426; GFX9-NEXT: s_setpc_b64 s[30:31] 4427; 4428; GFX9-O0-LABEL: v_sdiv_i128_v_pow2k: 4429; GFX9-O0: ; %bb.0: 4430; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4431; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 4432; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill 4433; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 4434; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload 4435; GFX9-O0-NEXT: ; implicit-def: $sgpr4 4436; GFX9-O0-NEXT: ; implicit-def: $sgpr4 4437; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 4438; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3 4439; GFX9-O0-NEXT: ; implicit-def: $sgpr4 4440; GFX9-O0-NEXT: ; implicit-def: $sgpr4 4441; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 4442; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 4443; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 4444; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 4445; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 4446; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 4447; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 4448; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4 4449; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 4450; GFX9-O0-NEXT: s_mov_b32 s4, 63 4451; GFX9-O0-NEXT: v_ashrrev_i64 v[4:5], s4, v[4:5] 4452; GFX9-O0-NEXT: s_mov_b32 s5, 31 4453; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], s5, v[4:5] 4454; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 4455; GFX9-O0-NEXT: v_mov_b32_e32 v4, v7 4456; GFX9-O0-NEXT: s_mov_b64 s[8:9], 0 4457; GFX9-O0-NEXT: s_mov_b32 s6, s8 4458; GFX9-O0-NEXT: s_mov_b32 s4, s9 4459; GFX9-O0-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 4460; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v4, vcc 4461; GFX9-O0-NEXT: v_mov_b32_e32 v4, s6 4462; GFX9-O0-NEXT: v_addc_co_u32_e32 v5, vcc, v2, v4, vcc 4463; GFX9-O0-NEXT: v_mov_b32_e32 v2, s4 4464; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 4465; GFX9-O0-NEXT: ; implicit-def: $sgpr4 4466; GFX9-O0-NEXT: ; implicit-def: $sgpr4 4467; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 4468; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1 4469; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 4470; GFX9-O0-NEXT: ; implicit-def: $sgpr4 4471; GFX9-O0-NEXT: ; implicit-def: $sgpr4 4472; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4473; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 4474; GFX9-O0-NEXT: s_mov_b32 s4, 33 4475; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 4476; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 4477; GFX9-O0-NEXT: v_lshl_or_b32 v0, v2, s5, v0 4478; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 4479; GFX9-O0-NEXT: v_mov_b32_e32 v4, v6 4480; GFX9-O0-NEXT: v_ashrrev_i64 v[3:4], s4, v[3:4] 4481; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 4482; GFX9-O0-NEXT: s_mov_b32 s4, 1 4483; GFX9-O0-NEXT: v_alignbit_b32 v1, v1, v2, s4 4484; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 4485; GFX9-O0-NEXT: s_mov_b32 s4, 32 4486; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4] 4487; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec 4488; GFX9-O0-NEXT: s_setpc_b64 s[30:31] 4489; 4490; GFX9-G-LABEL: v_sdiv_i128_v_pow2k: 4491; GFX9-G: ; %bb.0: 4492; GFX9-G-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4493; GFX9-G-NEXT: v_ashrrev_i32_e32 v4, 31, v3 4494; GFX9-G-NEXT: v_mov_b32_e32 v5, v4 4495; GFX9-G-NEXT: v_lshrrev_b64 v[4:5], 31, v[4:5] 4496; GFX9-G-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4 4497; GFX9-G-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v5, vcc 4498; GFX9-G-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc 4499; GFX9-G-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v3, vcc 4500; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2] 4501; GFX9-G-NEXT: v_lshrrev_b32_e32 v3, 1, v4 4502; GFX9-G-NEXT: v_or_b32_e32 v0, v3, v0 4503; GFX9-G-NEXT: v_ashrrev_i32_e32 v3, 31, v2 4504; GFX9-G-NEXT: v_ashrrev_i32_e32 v2, 1, v2 4505; GFX9-G-NEXT: s_setpc_b64 s[30:31] 4506; 4507; GFX9-G-O0-LABEL: v_sdiv_i128_v_pow2k: 4508; GFX9-G-O0: ; %bb.0: 4509; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4510; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v0 4511; GFX9-G-O0-NEXT: s_mov_b32 s4, 31 4512; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4 4513; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v0, v0, v3 4514; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v0 4515; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v0 4516; GFX9-G-O0-NEXT: s_mov_b32 s4, 31 4517; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4 4518; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v0, v[5:6] 4519; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6 4520; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v7 4521; GFX9-G-O0-NEXT: s_mov_b32 s8, 0 4522; GFX9-G-O0-NEXT: s_mov_b32 s5, 0 4523; GFX9-G-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v4, v5 4524; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v1, s[6:7], v1, v0, s[6:7] 4525; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8 4526; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v2, v0, s[6:7] 4527; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s5 4528; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v4, s[6:7], v3, v0, s[6:7] 4529; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 4530; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v4 4531; GFX9-G-O0-NEXT: s_mov_b32 s5, 1 4532; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s5 4533; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v0, v0, v1 4534; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0 4535; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4 4536; GFX9-G-O0-NEXT: v_lshlrev_b64 v[5:6], v2, v[5:6] 4537; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5 4538; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v6 4539; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v3 4540; GFX9-G-O0-NEXT: v_or_b32_e64 v1, v1, v2 4541; GFX9-G-O0-NEXT: s_mov_b32 s4, 31 4542; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4 4543; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v3, v2, v4 4544; GFX9-G-O0-NEXT: s_mov_b32 s4, 1 4545; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4 4546; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v2, v2, v4 4547; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31] 4548 %div = sdiv i128 %lhs, 8589934592 4549 ret i128 %div 4550} 4551 4552define i128 @v_udiv_i128_v_pow2k(i128 %lhs) { 4553; GFX9-LABEL: v_udiv_i128_v_pow2k: 4554; GFX9: ; %bb.0: 4555; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4556; GFX9-NEXT: v_mov_b32_e32 v4, v1 4557; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[2:3] 4558; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v4 4559; GFX9-NEXT: v_or_b32_e32 v0, v2, v0 4560; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v3 4561; GFX9-NEXT: v_mov_b32_e32 v3, 0 4562; GFX9-NEXT: s_setpc_b64 s[30:31] 4563; 4564; GFX9-O0-LABEL: v_udiv_i128_v_pow2k: 4565; GFX9-O0: ; %bb.0: 4566; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4567; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 4568; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 4569; GFX9-O0-NEXT: ; implicit-def: $sgpr4 4570; GFX9-O0-NEXT: ; implicit-def: $sgpr4 4571; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 4572; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 4573; GFX9-O0-NEXT: ; implicit-def: $sgpr4 4574; GFX9-O0-NEXT: ; implicit-def: $sgpr4 4575; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 4576; GFX9-O0-NEXT: v_mov_b32_e32 v6, v3 4577; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 4578; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 4579; GFX9-O0-NEXT: v_mov_b32_e32 v4, v5 4580; GFX9-O0-NEXT: s_mov_b32 s4, 33 4581; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 4582; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 4583; GFX9-O0-NEXT: s_mov_b32 s5, 31 4584; GFX9-O0-NEXT: v_lshl_or_b32 v0, v4, s5, v0 4585; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 4586; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6 4587; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[1:2] 4588; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 4589; GFX9-O0-NEXT: s_mov_b32 s4, 1 4590; GFX9-O0-NEXT: v_alignbit_b32 v1, v1, v4, s4 4591; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec 4592; GFX9-O0-NEXT: v_mov_b32_e32 v3, 0 4593; GFX9-O0-NEXT: s_setpc_b64 s[30:31] 4594; 4595; GFX9-G-LABEL: v_udiv_i128_v_pow2k: 4596; GFX9-G: ; %bb.0: 4597; GFX9-G-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4598; GFX9-G-NEXT: v_mov_b32_e32 v4, v1 4599; GFX9-G-NEXT: v_lshlrev_b64 v[0:1], 31, v[2:3] 4600; GFX9-G-NEXT: v_lshrrev_b32_e32 v2, 1, v4 4601; GFX9-G-NEXT: v_or_b32_e32 v0, v2, v0 4602; GFX9-G-NEXT: v_lshrrev_b32_e32 v2, 1, v3 4603; GFX9-G-NEXT: v_mov_b32_e32 v3, 0 4604; GFX9-G-NEXT: s_setpc_b64 s[30:31] 4605; 4606; GFX9-G-O0-LABEL: v_udiv_i128_v_pow2k: 4607; GFX9-G-O0: ; %bb.0: 4608; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4609; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v2 4610; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 4611; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v3 4612; GFX9-G-O0-NEXT: s_mov_b32 s4, 1 4613; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s4 4614; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v0, v0, v1 4615; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0 4616; GFX9-G-O0-NEXT: s_mov_b32 s4, 31 4617; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4 4618; GFX9-G-O0-NEXT: v_lshlrev_b64 v[5:6], v2, v[4:5] 4619; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5 4620; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v6 4621; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v4 4622; GFX9-G-O0-NEXT: v_or_b32_e64 v1, v1, v2 4623; GFX9-G-O0-NEXT: s_mov_b32 s4, 1 4624; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s4 4625; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v2, v2, v3 4626; GFX9-G-O0-NEXT: s_mov_b32 s4, 0 4627; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s4 4628; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31] 4629 %div = udiv i128 %lhs, 8589934592 4630 ret i128 %div 4631} 4632