1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s 3; RUN: llc -O0 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9-O0,GFX9-SDAG-O0 %s 4 5; FIXME: GlobalISel missing the power-of-2 cases in legalization. https://github.com/llvm/llvm-project/issues/80671 6; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9,GFX9 %s 7; xUN: llc -O0 -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9-O0,GFX9-O0 %s}} 8 9define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { 10; GFX9-LABEL: v_srem_i128_vv: 11; GFX9: ; %bb.0: ; %_udiv-special-cases 12; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, 0, v0 14; GFX9-NEXT: v_subb_co_u32_e32 v9, vcc, 0, v1, vcc 15; GFX9-NEXT: v_subb_co_u32_e32 v10, vcc, 0, v2, vcc 16; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc 17; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[2:3] 18; GFX9-NEXT: v_ashrrev_i32_e32 v20, 31, v3 19; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 20; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 21; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc 22; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 23; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, 0, v4 24; GFX9-NEXT: v_subb_co_u32_e32 v9, vcc, 0, v5, vcc 25; GFX9-NEXT: v_subb_co_u32_e32 v10, vcc, 0, v6, vcc 26; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v7, vcc 27; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[6:7] 28; GFX9-NEXT: v_mov_b32_e32 v21, v20 29; GFX9-NEXT: v_cndmask_b32_e32 v22, v5, v9, vcc 30; GFX9-NEXT: v_cndmask_b32_e32 v23, v4, v8, vcc 31; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v11, vcc 32; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v10, vcc 33; GFX9-NEXT: v_or_b32_e32 v7, v22, v5 34; GFX9-NEXT: v_or_b32_e32 v6, v23, v4 35; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] 36; GFX9-NEXT: v_or_b32_e32 v7, v1, v3 37; GFX9-NEXT: v_or_b32_e32 v6, v0, v2 38; GFX9-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[6:7] 39; GFX9-NEXT: v_ffbh_u32_e32 v6, v4 40; GFX9-NEXT: v_add_u32_e32 v6, 32, v6 41; GFX9-NEXT: v_ffbh_u32_e32 v7, v5 42; GFX9-NEXT: v_min_u32_e32 v6, v6, v7 43; GFX9-NEXT: v_ffbh_u32_e32 v7, v23 44; GFX9-NEXT: v_add_u32_e32 v7, 32, v7 45; GFX9-NEXT: v_ffbh_u32_e32 v8, v22 46; GFX9-NEXT: v_min_u32_e32 v7, v7, v8 47; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 48; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, 64, v7 49; GFX9-NEXT: v_addc_co_u32_e64 v8, s[6:7], 0, 0, vcc 50; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 51; GFX9-NEXT: v_ffbh_u32_e32 v10, v3 52; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc 53; GFX9-NEXT: v_ffbh_u32_e32 v7, v2 54; GFX9-NEXT: v_add_u32_e32 v7, 32, v7 55; GFX9-NEXT: v_min_u32_e32 v7, v7, v10 56; GFX9-NEXT: v_ffbh_u32_e32 v10, v0 57; GFX9-NEXT: v_add_u32_e32 v10, 32, v10 58; GFX9-NEXT: v_ffbh_u32_e32 v11, v1 59; GFX9-NEXT: v_min_u32_e32 v10, v10, v11 60; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc 61; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, 64, v10 62; GFX9-NEXT: v_addc_co_u32_e64 v11, s[6:7], 0, 0, vcc 63; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 64; GFX9-NEXT: v_mov_b32_e32 v9, 0 65; GFX9-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc 66; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, 0, vcc 67; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v7 68; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v8, v11, vcc 69; GFX9-NEXT: v_subbrev_co_u32_e32 v8, vcc, 0, v9, vcc 70; GFX9-NEXT: v_subbrev_co_u32_e32 v9, vcc, 0, v9, vcc 71; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f 72; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[6:7] 73; GFX9-NEXT: v_or_b32_e32 v12, v7, v9 74; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 75; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] 76; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 77; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 78; GFX9-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc 79; GFX9-NEXT: v_and_b32_e32 v10, 1, v10 80; GFX9-NEXT: v_xor_b32_e32 v11, 0x7f, v6 81; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v10 82; GFX9-NEXT: v_or_b32_e32 v11, v11, v8 83; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], vcc 84; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[11:12] 85; GFX9-NEXT: s_xor_b64 s[6:7], s[4:5], -1 86; GFX9-NEXT: v_cndmask_b32_e64 v10, v3, 0, s[4:5] 87; GFX9-NEXT: v_cndmask_b32_e64 v12, v2, 0, s[4:5] 88; GFX9-NEXT: v_cndmask_b32_e64 v11, v1, 0, s[4:5] 89; GFX9-NEXT: v_cndmask_b32_e64 v13, v0, 0, s[4:5] 90; GFX9-NEXT: s_and_b64 s[4:5], s[6:7], vcc 91; GFX9-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] 92; GFX9-NEXT: s_cbranch_execz .LBB0_6 93; GFX9-NEXT: ; %bb.1: ; %udiv-bb1 94; GFX9-NEXT: v_add_co_u32_e32 v24, vcc, 1, v6 95; GFX9-NEXT: v_addc_co_u32_e32 v25, vcc, 0, v7, vcc 96; GFX9-NEXT: v_addc_co_u32_e32 v26, vcc, 0, v8, vcc 97; GFX9-NEXT: v_sub_u32_e32 v13, 0x7f, v6 98; GFX9-NEXT: v_addc_co_u32_e32 v27, vcc, 0, v9, vcc 99; GFX9-NEXT: v_sub_u32_e32 v11, 64, v13 100; GFX9-NEXT: v_or_b32_e32 v8, v25, v27 101; GFX9-NEXT: v_or_b32_e32 v7, v24, v26 102; GFX9-NEXT: v_lshlrev_b64 v[9:10], v13, v[2:3] 103; GFX9-NEXT: v_lshrrev_b64 v[11:12], v11, v[0:1] 104; GFX9-NEXT: v_sub_u32_e32 v6, 63, v6 105; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[7:8] 106; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[0:1] 107; GFX9-NEXT: v_or_b32_e32 v8, v10, v12 108; GFX9-NEXT: v_or_b32_e32 v9, v9, v11 109; GFX9-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v13 110; GFX9-NEXT: v_lshlrev_b64 v[10:11], v13, v[0:1] 111; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] 112; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v13 113; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[4:5] 114; GFX9-NEXT: v_mov_b32_e32 v8, 0 115; GFX9-NEXT: v_mov_b32_e32 v12, 0 116; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[6:7] 117; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v2, s[6:7] 118; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, v11, s[4:5] 119; GFX9-NEXT: v_mov_b32_e32 v9, 0 120; GFX9-NEXT: v_mov_b32_e32 v13, 0 121; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5] 122; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 123; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 124; GFX9-NEXT: s_cbranch_execz .LBB0_5 125; GFX9-NEXT: ; %bb.2: ; %udiv-preheader 126; GFX9-NEXT: v_sub_u32_e32 v12, 64, v24 127; GFX9-NEXT: v_lshrrev_b64 v[8:9], v24, v[0:1] 128; GFX9-NEXT: v_lshlrev_b64 v[12:13], v12, v[2:3] 129; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24 130; GFX9-NEXT: v_or_b32_e32 v12, v8, v12 131; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v24 132; GFX9-NEXT: v_or_b32_e32 v13, v9, v13 133; GFX9-NEXT: v_lshrrev_b64 v[8:9], v8, v[2:3] 134; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v24 135; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v13, vcc 136; GFX9-NEXT: v_cndmask_b32_e64 v15, v9, v1, s[4:5] 137; GFX9-NEXT: v_cndmask_b32_e32 v12, v8, v12, vcc 138; GFX9-NEXT: v_lshrrev_b64 v[8:9], v24, v[2:3] 139; GFX9-NEXT: v_cndmask_b32_e64 v14, v12, v0, s[4:5] 140; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v9, vcc 141; GFX9-NEXT: v_cndmask_b32_e32 v16, 0, v8, vcc 142; GFX9-NEXT: v_add_co_u32_e32 v28, vcc, -1, v23 143; GFX9-NEXT: v_addc_co_u32_e32 v29, vcc, -1, v22, vcc 144; GFX9-NEXT: v_addc_co_u32_e32 v30, vcc, -1, v4, vcc 145; GFX9-NEXT: v_mov_b32_e32 v18, 0 146; GFX9-NEXT: v_mov_b32_e32 v12, 0 147; GFX9-NEXT: v_addc_co_u32_e32 v31, vcc, -1, v5, vcc 148; GFX9-NEXT: s_mov_b64 s[4:5], 0 149; GFX9-NEXT: v_mov_b32_e32 v19, 0 150; GFX9-NEXT: v_mov_b32_e32 v13, 0 151; GFX9-NEXT: v_mov_b32_e32 v9, 0 152; GFX9-NEXT: .LBB0_3: ; %udiv-do-while 153; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 154; GFX9-NEXT: v_lshrrev_b32_e32 v32, 31, v15 155; GFX9-NEXT: v_lshlrev_b64 v[14:15], 1, v[14:15] 156; GFX9-NEXT: v_lshrrev_b32_e32 v33, 31, v7 157; GFX9-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 158; GFX9-NEXT: v_lshrrev_b32_e32 v8, 31, v11 159; GFX9-NEXT: v_lshlrev_b64 v[16:17], 1, v[16:17] 160; GFX9-NEXT: v_or_b32_e32 v14, v14, v33 161; GFX9-NEXT: v_or3_b32 v6, v6, v8, v12 162; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, v28, v14 163; GFX9-NEXT: v_or_b32_e32 v16, v16, v32 164; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, v29, v15, vcc 165; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, v30, v16, vcc 166; GFX9-NEXT: v_lshlrev_b64 v[10:11], 1, v[10:11] 167; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, v31, v17, vcc 168; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v8 169; GFX9-NEXT: v_or_b32_e32 v10, v18, v10 170; GFX9-NEXT: v_and_b32_e32 v18, v8, v23 171; GFX9-NEXT: v_or_b32_e32 v11, v19, v11 172; GFX9-NEXT: v_and_b32_e32 v19, v8, v22 173; GFX9-NEXT: v_sub_co_u32_e32 v14, vcc, v14, v18 174; GFX9-NEXT: v_and_b32_e32 v32, v8, v4 175; GFX9-NEXT: v_subb_co_u32_e32 v15, vcc, v15, v19, vcc 176; GFX9-NEXT: v_and_b32_e32 v33, v8, v5 177; GFX9-NEXT: v_subb_co_u32_e32 v16, vcc, v16, v32, vcc 178; GFX9-NEXT: v_subb_co_u32_e32 v17, vcc, v17, v33, vcc 179; GFX9-NEXT: v_add_co_u32_e32 v24, vcc, -1, v24 180; GFX9-NEXT: v_addc_co_u32_e32 v25, vcc, -1, v25, vcc 181; GFX9-NEXT: v_addc_co_u32_e32 v26, vcc, -1, v26, vcc 182; GFX9-NEXT: v_addc_co_u32_e32 v27, vcc, -1, v27, vcc 183; GFX9-NEXT: v_or_b32_e32 v18, v24, v26 184; GFX9-NEXT: v_or_b32_e32 v19, v25, v27 185; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19] 186; GFX9-NEXT: v_and_b32_e32 v8, 1, v8 187; GFX9-NEXT: v_mov_b32_e32 v19, v9 188; GFX9-NEXT: v_or3_b32 v7, v7, 0, v13 189; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 190; GFX9-NEXT: v_mov_b32_e32 v18, v8 191; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 192; GFX9-NEXT: s_cbranch_execnz .LBB0_3 193; GFX9-NEXT: ; %bb.4: ; %Flow 194; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 195; GFX9-NEXT: .LBB0_5: ; %Flow2 196; GFX9-NEXT: s_or_b64 exec, exec, s[6:7] 197; GFX9-NEXT: v_lshlrev_b64 v[14:15], 1, v[10:11] 198; GFX9-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 199; GFX9-NEXT: v_lshrrev_b32_e32 v11, 31, v11 200; GFX9-NEXT: v_or3_b32 v10, v7, 0, v13 201; GFX9-NEXT: v_or3_b32 v12, v6, v11, v12 202; GFX9-NEXT: v_or_b32_e32 v11, v9, v15 203; GFX9-NEXT: v_or_b32_e32 v13, v8, v14 204; GFX9-NEXT: .LBB0_6: ; %Flow3 205; GFX9-NEXT: s_or_b64 exec, exec, s[8:9] 206; GFX9-NEXT: v_mul_lo_u32 v16, v13, v5 207; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v23, v13, 0 208; GFX9-NEXT: v_mov_b32_e32 v15, 0 209; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v13, v4, 0 210; GFX9-NEXT: v_mov_b32_e32 v14, v6 211; GFX9-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v22, v13, v[14:15] 212; GFX9-NEXT: v_mul_lo_u32 v9, v11, v4 213; GFX9-NEXT: v_mul_lo_u32 v10, v10, v23 214; GFX9-NEXT: v_mov_b32_e32 v4, v14 215; GFX9-NEXT: v_mov_b32_e32 v14, v15 216; GFX9-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v23, v11, v[13:14] 217; GFX9-NEXT: v_add3_u32 v8, v8, v16, v9 218; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v23, v[7:8] 219; GFX9-NEXT: v_mov_b32_e32 v8, v14 220; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v4, v8 221; GFX9-NEXT: v_addc_co_u32_e64 v9, s[4:5], 0, 0, vcc 222; GFX9-NEXT: v_mul_lo_u32 v12, v12, v22 223; GFX9-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v22, v11, v[8:9] 224; GFX9-NEXT: v_add3_u32 v4, v10, v7, v12 225; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v8, v6 226; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v9, v4, vcc 227; GFX9-NEXT: v_mov_b32_e32 v7, v13 228; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v5 229; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc 230; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v6, vcc 231; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc 232; GFX9-NEXT: v_xor_b32_e32 v0, v0, v20 233; GFX9-NEXT: v_xor_b32_e32 v1, v1, v21 234; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v20 235; GFX9-NEXT: v_xor_b32_e32 v2, v2, v20 236; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v21, vcc 237; GFX9-NEXT: v_xor_b32_e32 v3, v3, v21 238; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v20, vcc 239; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v21, vcc 240; GFX9-NEXT: s_setpc_b64 s[30:31] 241; 242; GFX9-O0-LABEL: v_srem_i128_vv: 243; GFX9-O0: ; %bb.0: ; %_udiv-special-cases 244; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 246; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill 247; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 248; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill 249; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6 250; GFX9-O0-NEXT: v_mov_b32_e32 v7, v4 251; GFX9-O0-NEXT: v_mov_b32_e32 v11, v2 252; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 253; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload 254; GFX9-O0-NEXT: ; implicit-def: $sgpr4 255; GFX9-O0-NEXT: ; implicit-def: $sgpr4 256; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec 257; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 258; GFX9-O0-NEXT: v_mov_b32_e32 v18, v1 259; GFX9-O0-NEXT: ; implicit-def: $sgpr4 260; GFX9-O0-NEXT: ; implicit-def: $sgpr4 261; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec 262; GFX9-O0-NEXT: v_mov_b32_e32 v8, v5 263; GFX9-O0-NEXT: ; implicit-def: $sgpr4 264; GFX9-O0-NEXT: ; implicit-def: $sgpr4 265; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 266; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 267; GFX9-O0-NEXT: ; implicit-def: $sgpr4 268; GFX9-O0-NEXT: ; implicit-def: $sgpr4 269; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec 270; GFX9-O0-NEXT: v_mov_b32_e32 v12, v3 271; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 272; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 273; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 274; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 275; GFX9-O0-NEXT: s_mov_b32 s4, 63 276; GFX9-O0-NEXT: v_mov_b32_e32 v2, v11 277; GFX9-O0-NEXT: v_mov_b32_e32 v3, v12 278; GFX9-O0-NEXT: v_ashrrev_i64 v[2:3], s4, v[2:3] 279; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill 280; GFX9-O0-NEXT: s_nop 0 281; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill 282; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill 283; GFX9-O0-NEXT: s_nop 0 284; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill 285; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 286; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 287; GFX9-O0-NEXT: v_mov_b32_e32 v0, v11 288; GFX9-O0-NEXT: v_mov_b32_e32 v1, v12 289; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 290; GFX9-O0-NEXT: ; implicit-def: $vgpr30 : SGPR spill to VGPR lane 291; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 0 292; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 1 293; GFX9-O0-NEXT: s_mov_b32 s10, s6 294; GFX9-O0-NEXT: v_writelane_b32 v30, s10, 2 295; GFX9-O0-NEXT: s_mov_b32 s11, s7 296; GFX9-O0-NEXT: v_writelane_b32 v30, s11, 3 297; GFX9-O0-NEXT: v_sub_co_u32_e32 v9, vcc, s10, v2 298; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 299; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v4, v3, vcc 300; GFX9-O0-NEXT: v_mov_b32_e32 v4, s10 301; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v0, vcc 302; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11 303; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v1, vcc 304; GFX9-O0-NEXT: ; implicit-def: $sgpr4 305; GFX9-O0-NEXT: ; implicit-def: $sgpr4 306; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec 307; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 308; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 309; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] 310; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[11:12], s[4:5] 311; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] 312; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 313; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] 314; GFX9-O0-NEXT: ; implicit-def: $sgpr8 315; GFX9-O0-NEXT: ; implicit-def: $sgpr8 316; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2 317; GFX9-O0-NEXT: v_mov_b32_e32 v16, v3 318; GFX9-O0-NEXT: ; implicit-def: $sgpr8 319; GFX9-O0-NEXT: ; implicit-def: $sgpr8 320; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 321; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 322; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 323; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5] 324; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec 325; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 326; GFX9-O0-NEXT: ; implicit-def: $sgpr4 327; GFX9-O0-NEXT: ; implicit-def: $sgpr4 328; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 329; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 330; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 331; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 332; GFX9-O0-NEXT: v_mov_b32_e32 v7, v17 333; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18 334; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, s10, v6 335; GFX9-O0-NEXT: v_mov_b32_e32 v10, s11 336; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v10, v9, vcc 337; GFX9-O0-NEXT: v_mov_b32_e32 v10, s10 338; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v7, vcc 339; GFX9-O0-NEXT: v_mov_b32_e32 v12, s11 340; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v8, vcc 341; GFX9-O0-NEXT: ; implicit-def: $sgpr4 342; GFX9-O0-NEXT: ; implicit-def: $sgpr4 343; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec 344; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11 345; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 346; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] 347; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[17:18], s[4:5] 348; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5] 349; GFX9-O0-NEXT: v_mov_b32_e32 v11, v13 350; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v11, s[4:5] 351; GFX9-O0-NEXT: ; implicit-def: $sgpr8 352; GFX9-O0-NEXT: ; implicit-def: $sgpr8 353; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6 354; GFX9-O0-NEXT: v_mov_b32_e32 v18, v9 355; GFX9-O0-NEXT: ; implicit-def: $sgpr8 356; GFX9-O0-NEXT: ; implicit-def: $sgpr8 357; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec 358; GFX9-O0-NEXT: v_mov_b32_e32 v11, v12 359; GFX9-O0-NEXT: v_mov_b32_e32 v12, v11 360; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v8, v12, s[4:5] 361; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec 362; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[4:5] 363; GFX9-O0-NEXT: ; implicit-def: $sgpr4 364; GFX9-O0-NEXT: ; implicit-def: $sgpr4 365; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7 366; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8 367; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5 368; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4 369; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill 370; GFX9-O0-NEXT: s_nop 0 371; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill 372; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15 373; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 374; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill 375; GFX9-O0-NEXT: s_nop 0 376; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill 377; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 378; GFX9-O0-NEXT: v_mov_b32_e32 v12, v10 379; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill 380; GFX9-O0-NEXT: s_nop 0 381; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill 382; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17 383; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18 384; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill 385; GFX9-O0-NEXT: s_nop 0 386; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill 387; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 388; GFX9-O0-NEXT: v_mov_b32_e32 v12, v10 389; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill 390; GFX9-O0-NEXT: s_nop 0 391; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill 392; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17 393; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18 394; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill 395; GFX9-O0-NEXT: s_nop 0 396; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill 397; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5 398; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4 399; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill 400; GFX9-O0-NEXT: s_nop 0 401; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill 402; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15 403; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 404; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill 405; GFX9-O0-NEXT: s_nop 0 406; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill 407; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 408; GFX9-O0-NEXT: v_mov_b32_e32 v12, v18 409; GFX9-O0-NEXT: v_or_b32_e64 v14, v12, v13 410; GFX9-O0-NEXT: v_mov_b32_e32 v13, v10 411; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17 412; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v13 413; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 414; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14 415; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[12:13], s[6:7] 416; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5 417; GFX9-O0-NEXT: v_mov_b32_e32 v12, v16 418; GFX9-O0-NEXT: v_or_b32_e64 v14, v12, v13 419; GFX9-O0-NEXT: v_mov_b32_e32 v13, v4 420; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15 421; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v13 422; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 423; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14 424; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[12:13], s[6:7] 425; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 426; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 427; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[10:11], s[8:9] 428; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7 429; GFX9-O0-NEXT: s_mov_b32 s13, 32 430; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s13 431; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8 432; GFX9-O0-NEXT: v_min_u32_e64 v7, v7, v8 433; GFX9-O0-NEXT: s_mov_b32 s12, 0 434; GFX9-O0-NEXT: ; implicit-def: $sgpr14 435; GFX9-O0-NEXT: v_mov_b32_e32 v10, s12 436; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec 437; GFX9-O0-NEXT: v_mov_b32_e32 v8, v10 438; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8 439; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6 440; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s13 441; GFX9-O0-NEXT: v_ffbh_u32_e64 v9, v9 442; GFX9-O0-NEXT: v_min_u32_e64 v12, v6, v9 443; GFX9-O0-NEXT: ; implicit-def: $sgpr14 444; GFX9-O0-NEXT: v_mov_b32_e32 v6, s12 445; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 446; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6 447; GFX9-O0-NEXT: s_mov_b64 s[14:15], 64 448; GFX9-O0-NEXT: v_mov_b32_e32 v9, v12 449; GFX9-O0-NEXT: s_mov_b32 s16, s14 450; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13 451; GFX9-O0-NEXT: s_mov_b32 s18, s15 452; GFX9-O0-NEXT: v_add_co_u32_e64 v9, s[16:17], v9, s16 453; GFX9-O0-NEXT: v_mov_b32_e32 v10, s18 454; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[16:17], v6, v10, s[16:17] 455; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec 456; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6 457; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 458; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v11, s[8:9] 459; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7 460; GFX9-O0-NEXT: v_mov_b32_e32 v7, v9 461; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v7, v8, s[8:9] 462; GFX9-O0-NEXT: ; implicit-def: $sgpr8 463; GFX9-O0-NEXT: ; implicit-def: $sgpr8 464; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 465; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6 466; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 467; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[4:5], s[8:9] 468; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0 469; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13 470; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1 471; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5 472; GFX9-O0-NEXT: ; implicit-def: $sgpr16 473; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12 474; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 475; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 476; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 477; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2 478; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13 479; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3 480; GFX9-O0-NEXT: v_min_u32_e64 v11, v4, v10 481; GFX9-O0-NEXT: ; implicit-def: $sgpr13 482; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12 483; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec 484; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4 485; GFX9-O0-NEXT: v_mov_b32_e32 v10, v11 486; GFX9-O0-NEXT: s_mov_b32 s12, s14 487; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 488; GFX9-O0-NEXT: s_mov_b32 s14, s15 489; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[12:13], v10, s12 490; GFX9-O0-NEXT: v_mov_b32_e32 v11, s14 491; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v11, s[12:13] 492; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec 493; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4 494; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11 495; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[8:9] 496; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 497; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 498; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9] 499; GFX9-O0-NEXT: ; implicit-def: $sgpr8 500; GFX9-O0-NEXT: ; implicit-def: $sgpr8 501; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 502; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 503; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 504; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 505; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec 506; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 507; GFX9-O0-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v7 508; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v5, v6, vcc 509; GFX9-O0-NEXT: v_mov_b32_e32 v6, s10 510; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10 511; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v6, vcc 512; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11 513; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 514; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc 515; GFX9-O0-NEXT: ; implicit-def: $sgpr8 516; GFX9-O0-NEXT: ; implicit-def: $sgpr8 517; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 518; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 519; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill 520; GFX9-O0-NEXT: s_nop 0 521; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill 522; GFX9-O0-NEXT: ; implicit-def: $sgpr8 523; GFX9-O0-NEXT: ; implicit-def: $sgpr8 524; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec 525; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6 526; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill 527; GFX9-O0-NEXT: s_nop 0 528; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill 529; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7] 530; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f 531; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13] 532; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15] 533; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7] 534; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15] 535; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9] 536; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6 537; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1 538; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] 539; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1 540; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] 541; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 542; GFX9-O0-NEXT: s_mov_b32 s14, s13 543; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14 544; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13 545; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12 546; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 547; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 548; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 549; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 550; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v9 551; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec 552; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 553; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 554; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 555; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 556; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7] 557; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 558; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9] 559; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 560; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9] 561; GFX9-O0-NEXT: ; implicit-def: $sgpr12 562; GFX9-O0-NEXT: ; implicit-def: $sgpr12 563; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 564; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 565; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 566; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9] 567; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10 568; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9] 569; GFX9-O0-NEXT: ; implicit-def: $sgpr8 570; GFX9-O0-NEXT: ; implicit-def: $sgpr8 571; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 572; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 573; GFX9-O0-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7] 574; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill 575; GFX9-O0-NEXT: s_nop 0 576; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 577; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 578; GFX9-O0-NEXT: s_nop 0 579; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 580; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec 581; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 4 582; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 5 583; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 584; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 585; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 586; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 587; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 588; GFX9-O0-NEXT: s_cbranch_execz .LBB0_3 589; GFX9-O0-NEXT: s_branch .LBB0_8 590; GFX9-O0-NEXT: .LBB0_1: ; %Flow 591; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 592; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 593; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 594; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 595; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6 596; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7 597; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] 598; GFX9-O0-NEXT: ; %bb.2: ; %Flow 599; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload 600; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload 601; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload 602; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload 603; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload 604; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload 605; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload 606; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload 607; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 608; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill 609; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 610; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill 611; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 612; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill 613; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 614; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill 615; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 616; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill 617; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 618; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill 619; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 620; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill 621; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 622; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill 623; GFX9-O0-NEXT: s_branch .LBB0_5 624; GFX9-O0-NEXT: .LBB0_3: ; %Flow2 625; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 626; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 627; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 628; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 629; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4 630; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5 631; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] 632; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload 633; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload 634; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 635; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload 636; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 637; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill 638; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 639; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill 640; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill 641; GFX9-O0-NEXT: s_nop 0 642; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill 643; GFX9-O0-NEXT: s_branch .LBB0_9 644; GFX9-O0-NEXT: .LBB0_4: ; %udiv-loop-exit 645; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload 646; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload 647; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload 648; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload 649; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload 650; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload 651; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload 652; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload 653; GFX9-O0-NEXT: s_mov_b32 s4, 1 654; GFX9-O0-NEXT: s_waitcnt vmcnt(2) 655; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[0:1] 656; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 657; GFX9-O0-NEXT: v_lshlrev_b64 v[9:10], s4, v[9:10] 658; GFX9-O0-NEXT: s_mov_b32 s4, 63 659; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 660; GFX9-O0-NEXT: v_mov_b32_e32 v11, v1 661; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 662; GFX9-O0-NEXT: v_mov_b32_e32 v12, v8 663; GFX9-O0-NEXT: v_or3_b32 v4, v4, v11, v12 664; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 665; GFX9-O0-NEXT: v_mov_b32_e32 v0, v9 666; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7 667; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 668; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 669; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 670; GFX9-O0-NEXT: v_mov_b32_e32 v4, v6 671; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v7 672; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 673; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 674; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 675; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 676; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 677; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill 678; GFX9-O0-NEXT: s_nop 0 679; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 680; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 681; GFX9-O0-NEXT: s_nop 0 682; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 683; GFX9-O0-NEXT: s_branch .LBB0_3 684; GFX9-O0-NEXT: .LBB0_5: ; %Flow1 685; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 686; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 687; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 688; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 689; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 8 690; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 9 691; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] 692; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload 693; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload 694; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload 695; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload 696; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload 697; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload 698; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload 699; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload 700; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 701; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill 702; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 703; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill 704; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill 705; GFX9-O0-NEXT: s_nop 0 706; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill 707; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill 708; GFX9-O0-NEXT: s_nop 0 709; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill 710; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill 711; GFX9-O0-NEXT: s_nop 0 712; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill 713; GFX9-O0-NEXT: s_branch .LBB0_4 714; GFX9-O0-NEXT: .LBB0_6: ; %udiv-do-while 715; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1 716; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 717; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 718; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 719; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 720; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 10 721; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 11 722; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload 723; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload 724; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload 725; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload 726; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload 727; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload 728; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload 729; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload 730; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload 731; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload 732; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload 733; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload 734; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload 735; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload 736; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload 737; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload 738; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload 739; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload 740; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload 741; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload 742; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload 743; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload 744; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload 745; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload 746; GFX9-O0-NEXT: s_mov_b32 s4, 63 747; GFX9-O0-NEXT: s_waitcnt vmcnt(16) 748; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3] 749; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29 750; GFX9-O0-NEXT: s_mov_b32 s5, 1 751; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s5, v[22:23] 752; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23 753; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 754; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28 755; GFX9-O0-NEXT: v_mov_b32_e32 v5, v22 756; GFX9-O0-NEXT: v_or_b32_e64 v22, v5, v10 757; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec 758; GFX9-O0-NEXT: v_mov_b32_e32 v23, v4 759; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[2:3] 760; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], s4, v[6:7] 761; GFX9-O0-NEXT: v_mov_b32_e32 v2, v29 762; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 763; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 764; GFX9-O0-NEXT: v_mov_b32_e32 v3, v28 765; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec 766; GFX9-O0-NEXT: v_or_b32_e64 v4, v3, v4 767; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 768; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 769; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s5, v[0:1] 770; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[6:7] 771; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 772; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 773; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29 774; GFX9-O0-NEXT: s_waitcnt vmcnt(10) 775; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27 776; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10 777; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 778; GFX9-O0-NEXT: v_mov_b32_e32 v0, v28 779; GFX9-O0-NEXT: v_mov_b32_e32 v7, v26 780; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7 781; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 782; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 783; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 784; GFX9-O0-NEXT: s_waitcnt vmcnt(8) 785; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25 786; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 787; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 788; GFX9-O0-NEXT: v_mov_b32_e32 v2, v24 789; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 790; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 791; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 792; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 793; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 794; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22 795; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 796; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 797; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 798; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 799; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15 800; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6 801; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 802; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc 803; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc 804; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc 805; GFX9-O0-NEXT: ; implicit-def: $sgpr5 806; GFX9-O0-NEXT: ; implicit-def: $sgpr5 807; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec 808; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 809; GFX9-O0-NEXT: v_ashrrev_i64 v[13:14], s4, v[11:12] 810; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 811; GFX9-O0-NEXT: s_mov_b64 s[4:5], 1 812; GFX9-O0-NEXT: s_mov_b32 s8, s5 813; GFX9-O0-NEXT: v_and_b32_e64 v12, v7, s8 814; GFX9-O0-NEXT: v_mov_b32_e32 v11, v13 815; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 816; GFX9-O0-NEXT: v_and_b32_e64 v14, v11, s4 817; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 818; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12 819; GFX9-O0-NEXT: v_mov_b32_e32 v12, 0 820; GFX9-O0-NEXT: v_mov_b32_e32 v13, 0 821; GFX9-O0-NEXT: v_mov_b32_e32 v22, v21 822; GFX9-O0-NEXT: v_and_b32_e64 v22, v7, v22 823; GFX9-O0-NEXT: v_and_b32_e64 v20, v11, v20 824; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec 825; GFX9-O0-NEXT: v_mov_b32_e32 v21, v22 826; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19 827; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v22 828; GFX9-O0-NEXT: v_and_b32_e64 v22, v11, v18 829; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec 830; GFX9-O0-NEXT: v_mov_b32_e32 v23, v7 831; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22 832; GFX9-O0-NEXT: v_mov_b32_e32 v18, v23 833; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 834; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21 835; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v19 836; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v18, vcc 837; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v11, vcc 838; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v7, vcc 839; GFX9-O0-NEXT: ; implicit-def: $sgpr4 840; GFX9-O0-NEXT: ; implicit-def: $sgpr4 841; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 842; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 843; GFX9-O0-NEXT: ; implicit-def: $sgpr4 844; GFX9-O0-NEXT: ; implicit-def: $sgpr4 845; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 846; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 847; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8 848; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 killed $vgpr8_vgpr9 killed $exec 849; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 850; GFX9-O0-NEXT: s_mov_b32 s5, s8 851; GFX9-O0-NEXT: s_mov_b32 s4, s9 852; GFX9-O0-NEXT: v_mov_b32_e32 v10, v16 853; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 854; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 855; GFX9-O0-NEXT: v_add_co_u32_e32 v19, vcc, v11, v16 856; GFX9-O0-NEXT: v_mov_b32_e32 v11, s4 857; GFX9-O0-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v11, vcc 858; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5 859; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v10, v11, vcc 860; GFX9-O0-NEXT: v_mov_b32_e32 v10, s4 861; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v8, v10, vcc 862; GFX9-O0-NEXT: ; implicit-def: $sgpr4 863; GFX9-O0-NEXT: ; implicit-def: $sgpr4 864; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec 865; GFX9-O0-NEXT: v_mov_b32_e32 v20, v9 866; GFX9-O0-NEXT: ; implicit-def: $sgpr4 867; GFX9-O0-NEXT: ; implicit-def: $sgpr4 868; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 869; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8 870; GFX9-O0-NEXT: v_mov_b32_e32 v8, v16 871; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17 872; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19 873; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 874; GFX9-O0-NEXT: v_mov_b32_e32 v21, v17 875; GFX9-O0-NEXT: v_mov_b32_e32 v18, v20 876; GFX9-O0-NEXT: v_or_b32_e64 v18, v18, v21 877; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16 878; GFX9-O0-NEXT: v_mov_b32_e32 v16, v19 879; GFX9-O0-NEXT: v_or_b32_e64 v16, v16, v17 880; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 881; GFX9-O0-NEXT: v_mov_b32_e32 v17, v18 882; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13] 883; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 884; GFX9-O0-NEXT: v_mov_b32_e32 v17, v3 885; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2 886; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill 887; GFX9-O0-NEXT: s_nop 0 888; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill 889; GFX9-O0-NEXT: v_mov_b32_e32 v17, v1 890; GFX9-O0-NEXT: v_mov_b32_e32 v16, v0 891; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill 892; GFX9-O0-NEXT: s_nop 0 893; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill 894; GFX9-O0-NEXT: v_mov_b32_e32 v17, v15 895; GFX9-O0-NEXT: v_mov_b32_e32 v16, v14 896; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill 897; GFX9-O0-NEXT: s_nop 0 898; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill 899; GFX9-O0-NEXT: v_mov_b32_e32 v17, v13 900; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12 901; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill 902; GFX9-O0-NEXT: s_nop 0 903; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill 904; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] 905; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6 906; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7 907; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] 908; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 10 909; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 11 910; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 911; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 912; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 913; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill 914; GFX9-O0-NEXT: s_nop 0 915; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill 916; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill 917; GFX9-O0-NEXT: s_nop 0 918; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill 919; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill 920; GFX9-O0-NEXT: s_nop 0 921; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill 922; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill 923; GFX9-O0-NEXT: s_nop 0 924; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill 925; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill 926; GFX9-O0-NEXT: s_nop 0 927; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill 928; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill 929; GFX9-O0-NEXT: s_nop 0 930; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill 931; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill 932; GFX9-O0-NEXT: s_nop 0 933; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill 934; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill 935; GFX9-O0-NEXT: s_nop 0 936; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill 937; GFX9-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] 938; GFX9-O0-NEXT: s_cbranch_execnz .LBB0_6 939; GFX9-O0-NEXT: s_branch .LBB0_1 940; GFX9-O0-NEXT: .LBB0_7: ; %udiv-preheader 941; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 942; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 943; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 944; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload 945; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload 946; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload 947; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload 948; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload 949; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload 950; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload 951; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload 952; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload 953; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload 954; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload 955; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload 956; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload 957; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload 958; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload 959; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload 960; GFX9-O0-NEXT: s_waitcnt vmcnt(9) 961; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 962; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 963; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21] 964; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 965; GFX9-O0-NEXT: s_mov_b32 s6, 64 966; GFX9-O0-NEXT: v_sub_u32_e64 v12, s6, v4 967; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], v12, v[18:19] 968; GFX9-O0-NEXT: v_mov_b32_e32 v12, v23 969; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12 970; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec 971; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22 972; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 973; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 974; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 975; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 976; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v4, s6 977; GFX9-O0-NEXT: v_sub_u32_e64 v5, v4, s6 978; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], v5, v[18:19] 979; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 980; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5] 981; GFX9-O0-NEXT: s_mov_b32 s6, 0 982; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, s6 983; GFX9-O0-NEXT: v_mov_b32_e32 v12, v21 984; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[6:7] 985; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 986; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22 987; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] 988; GFX9-O0-NEXT: v_mov_b32_e32 v7, v20 989; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7] 990; GFX9-O0-NEXT: ; implicit-def: $sgpr6 991; GFX9-O0-NEXT: ; implicit-def: $sgpr6 992; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 993; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 994; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[18:19] 995; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5 996; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 997; GFX9-O0-NEXT: s_mov_b32 s8, s7 998; GFX9-O0-NEXT: v_mov_b32_e32 v12, s8 999; GFX9-O0-NEXT: v_cndmask_b32_e64 v12, v12, v15, s[4:5] 1000; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 1001; GFX9-O0-NEXT: s_mov_b32 s8, s6 1002; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 1003; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] 1004; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1005; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1006; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 1007; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 1008; GFX9-O0-NEXT: v_mov_b32_e32 v12, v13 1009; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14 1010; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 1011; GFX9-O0-NEXT: s_mov_b32 s5, s8 1012; GFX9-O0-NEXT: s_mov_b32 s4, s9 1013; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16 1014; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 1015; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 1016; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v16 1017; GFX9-O0-NEXT: v_mov_b32_e32 v16, s4 1018; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v15, v16, vcc 1019; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5 1020; GFX9-O0-NEXT: v_addc_co_u32_e32 v14, vcc, v14, v15, vcc 1021; GFX9-O0-NEXT: v_mov_b32_e32 v15, s4 1022; GFX9-O0-NEXT: v_addc_co_u32_e32 v13, vcc, v13, v15, vcc 1023; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1024; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1025; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 1026; GFX9-O0-NEXT: v_mov_b32_e32 v15, v13 1027; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1028; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1029; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 1030; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 1031; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 1032; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill 1033; GFX9-O0-NEXT: s_nop 0 1034; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill 1035; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill 1036; GFX9-O0-NEXT: s_nop 0 1037; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill 1038; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] 1039; GFX9-O0-NEXT: v_mov_b32_e32 v15, s9 1040; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8 1041; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7 1042; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 1043; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 10 1044; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 11 1045; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 1046; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 1047; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 1048; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill 1049; GFX9-O0-NEXT: s_nop 0 1050; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill 1051; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill 1052; GFX9-O0-NEXT: s_nop 0 1053; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill 1054; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill 1055; GFX9-O0-NEXT: s_nop 0 1056; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill 1057; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill 1058; GFX9-O0-NEXT: s_nop 0 1059; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill 1060; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill 1061; GFX9-O0-NEXT: s_nop 0 1062; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill 1063; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill 1064; GFX9-O0-NEXT: s_nop 0 1065; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill 1066; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill 1067; GFX9-O0-NEXT: s_nop 0 1068; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill 1069; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill 1070; GFX9-O0-NEXT: s_nop 0 1071; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill 1072; GFX9-O0-NEXT: s_branch .LBB0_6 1073; GFX9-O0-NEXT: .LBB0_8: ; %udiv-bb1 1074; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 1075; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 1076; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 1077; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload 1078; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload 1079; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload 1080; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload 1081; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload 1082; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload 1083; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload 1084; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload 1085; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1 1086; GFX9-O0-NEXT: s_mov_b32 s5, s6 1087; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 1088; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 1089; GFX9-O0-NEXT: s_mov_b32 s4, s7 1090; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 1091; GFX9-O0-NEXT: s_mov_b32 s8, s6 1092; GFX9-O0-NEXT: s_mov_b32 s9, s7 1093; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 1094; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 1095; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 1096; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4 1097; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4 1098; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 1099; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc 1100; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 1101; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc 1102; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9 1103; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc 1104; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1105; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1106; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1107; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 1108; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1109; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1110; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1111; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 1112; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 1113; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 1114; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill 1115; GFX9-O0-NEXT: s_nop 0 1116; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill 1117; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 1118; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 1119; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill 1120; GFX9-O0-NEXT: s_nop 0 1121; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill 1122; GFX9-O0-NEXT: s_mov_b32 s4, 0x7f 1123; GFX9-O0-NEXT: v_sub_u32_e64 v2, s4, v3 1124; GFX9-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[10:11] 1125; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 1126; GFX9-O0-NEXT: s_mov_b32 s4, 64 1127; GFX9-O0-NEXT: v_sub_u32_e64 v13, s4, v2 1128; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], v13, v[6:7] 1129; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14 1130; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v15 1131; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec 1132; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 1133; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 1134; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 1135; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 1136; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5 1137; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v2, s4 1138; GFX9-O0-NEXT: s_mov_b32 s10, 63 1139; GFX9-O0-NEXT: v_sub_u32_e64 v3, s10, v3 1140; GFX9-O0-NEXT: v_lshlrev_b64 v[12:13], v3, v[6:7] 1141; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 1142; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[4:5] 1143; GFX9-O0-NEXT: s_mov_b32 s10, 0 1144; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v2, s10 1145; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11 1146; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[10:11] 1147; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 1148; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 1149; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] 1150; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 1151; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[10:11] 1152; GFX9-O0-NEXT: ; implicit-def: $sgpr10 1153; GFX9-O0-NEXT: ; implicit-def: $sgpr10 1154; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 1155; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3 1156; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], v2, v[6:7] 1157; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7 1158; GFX9-O0-NEXT: v_mov_b32_e32 v2, s9 1159; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] 1160; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec 1161; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8 1162; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v3, v6, s[4:5] 1163; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1164; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1165; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 1166; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 1167; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill 1168; GFX9-O0-NEXT: s_nop 0 1169; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill 1170; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill 1171; GFX9-O0-NEXT: s_nop 0 1172; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill 1173; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 1174; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 1175; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 1176; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 1177; GFX9-O0-NEXT: v_mov_b32_e32 v0, v8 1178; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1 1179; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1180; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 1181; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7] 1182; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 1183; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8 1184; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9 1185; GFX9-O0-NEXT: v_mov_b32_e32 v0, s6 1186; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7 1187; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill 1188; GFX9-O0-NEXT: s_nop 0 1189; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill 1190; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill 1191; GFX9-O0-NEXT: s_nop 0 1192; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill 1193; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill 1194; GFX9-O0-NEXT: s_nop 0 1195; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill 1196; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill 1197; GFX9-O0-NEXT: s_nop 0 1198; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill 1199; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec 1200; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] 1201; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] 1202; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8 1203; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9 1204; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 1205; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 1206; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] 1207; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 1208; GFX9-O0-NEXT: s_cbranch_execz .LBB0_5 1209; GFX9-O0-NEXT: s_branch .LBB0_7 1210; GFX9-O0-NEXT: .LBB0_9: ; %udiv-end 1211; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload 1212; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload 1213; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload 1214; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload 1215; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload 1216; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload 1217; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload 1218; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload 1219; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload 1220; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload 1221; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload 1222; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload 1223; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload 1224; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload 1225; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload 1226; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload 1227; GFX9-O0-NEXT: s_mov_b32 s4, 32 1228; GFX9-O0-NEXT: s_waitcnt vmcnt(2) 1229; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[16:17] 1230; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 1231; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 1232; GFX9-O0-NEXT: v_mov_b32_e32 v0, v20 1233; GFX9-O0-NEXT: v_mul_lo_u32 v8, v1, v0 1234; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 1235; GFX9-O0-NEXT: v_lshrrev_b64 v[20:21], s4, v[20:21] 1236; GFX9-O0-NEXT: v_mov_b32_e32 v5, v20 1237; GFX9-O0-NEXT: v_mov_b32_e32 v2, v16 1238; GFX9-O0-NEXT: v_mul_lo_u32 v5, v2, v5 1239; GFX9-O0-NEXT: v_mad_u64_u32 v[16:17], s[6:7], v2, v0, 0 1240; GFX9-O0-NEXT: v_mov_b32_e32 v0, v17 1241; GFX9-O0-NEXT: v_add3_u32 v8, v0, v5, v8 1242; GFX9-O0-NEXT: ; implicit-def: $sgpr5 1243; GFX9-O0-NEXT: ; implicit-def: $sgpr6 1244; GFX9-O0-NEXT: ; implicit-def: $sgpr6 1245; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5 1246; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1247; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0 1248; GFX9-O0-NEXT: v_lshlrev_b64 v[8:9], s4, v[8:9] 1249; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 1250; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 killed $vgpr16_vgpr17 killed $exec 1251; GFX9-O0-NEXT: s_mov_b32 s5, 0 1252; GFX9-O0-NEXT: ; implicit-def: $sgpr6 1253; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5 1254; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 1255; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0 1256; GFX9-O0-NEXT: v_mov_b32_e32 v0, v17 1257; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v5 1258; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec 1259; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16 1260; GFX9-O0-NEXT: v_or_b32_e64 v16, v5, v8 1261; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 1262; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0 1263; GFX9-O0-NEXT: v_lshrrev_b64 v[8:9], s4, v[18:19] 1264; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 1265; GFX9-O0-NEXT: v_mov_b32_e32 v8, v14 1266; GFX9-O0-NEXT: v_mul_lo_u32 v9, v8, v5 1267; GFX9-O0-NEXT: v_lshrrev_b64 v[14:15], s4, v[14:15] 1268; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec 1269; GFX9-O0-NEXT: v_mov_b32_e32 v0, v18 1270; GFX9-O0-NEXT: v_mul_lo_u32 v14, v14, v0 1271; GFX9-O0-NEXT: v_mad_u64_u32 v[18:19], s[6:7], v8, v0, 0 1272; GFX9-O0-NEXT: v_mov_b32_e32 v8, v19 1273; GFX9-O0-NEXT: v_add3_u32 v8, v8, v9, v14 1274; GFX9-O0-NEXT: ; implicit-def: $sgpr6 1275; GFX9-O0-NEXT: ; implicit-def: $sgpr7 1276; GFX9-O0-NEXT: ; implicit-def: $sgpr7 1277; GFX9-O0-NEXT: v_mov_b32_e32 v14, s6 1278; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1279; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14 1280; GFX9-O0-NEXT: v_lshlrev_b64 v[8:9], s4, v[8:9] 1281; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9 1282; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 killed $vgpr18_vgpr19 killed $exec 1283; GFX9-O0-NEXT: ; implicit-def: $sgpr6 1284; GFX9-O0-NEXT: v_mov_b32_e32 v14, s5 1285; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec 1286; GFX9-O0-NEXT: v_mov_b32_e32 v19, v14 1287; GFX9-O0-NEXT: v_mov_b32_e32 v14, v19 1288; GFX9-O0-NEXT: v_or_b32_e64 v14, v14, v15 1289; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 1290; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18 1291; GFX9-O0-NEXT: v_or_b32_e64 v8, v8, v9 1292; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1293; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14 1294; GFX9-O0-NEXT: v_mov_b32_e32 v14, v8 1295; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 1296; GFX9-O0-NEXT: v_mov_b32_e32 v8, v9 1297; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17 1298; GFX9-O0-NEXT: v_add_co_u32_e64 v16, s[6:7], v14, v15 1299; GFX9-O0-NEXT: v_addc_co_u32_e64 v8, s[6:7], v8, v9, s[6:7] 1300; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 1301; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8 1302; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v5, v1, 0 1303; GFX9-O0-NEXT: v_mov_b32_e32 v18, v14 1304; GFX9-O0-NEXT: ; implicit-def: $sgpr6 1305; GFX9-O0-NEXT: v_mov_b32_e32 v8, s5 1306; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec 1307; GFX9-O0-NEXT: v_mov_b32_e32 v19, v8 1308; GFX9-O0-NEXT: v_mov_b32_e32 v8, v19 1309; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 1310; GFX9-O0-NEXT: ; implicit-def: $sgpr6 1311; GFX9-O0-NEXT: ; implicit-def: $sgpr7 1312; GFX9-O0-NEXT: ; implicit-def: $sgpr7 1313; GFX9-O0-NEXT: v_mov_b32_e32 v9, s6 1314; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 1315; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9 1316; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15] 1317; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15 1318; GFX9-O0-NEXT: v_or_b32_e64 v8, v8, v9 1319; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 1320; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec 1321; GFX9-O0-NEXT: v_or_b32_e64 v20, v9, v14 1322; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec 1323; GFX9-O0-NEXT: v_mov_b32_e32 v21, v8 1324; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v5, v2, 0 1325; GFX9-O0-NEXT: v_mov_b32_e32 v8, v14 1326; GFX9-O0-NEXT: ; implicit-def: $sgpr6 1327; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5 1328; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1329; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5 1330; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 1331; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 1332; GFX9-O0-NEXT: ; implicit-def: $sgpr6 1333; GFX9-O0-NEXT: ; implicit-def: $sgpr7 1334; GFX9-O0-NEXT: ; implicit-def: $sgpr7 1335; GFX9-O0-NEXT: v_mov_b32_e32 v18, s6 1336; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 1337; GFX9-O0-NEXT: v_mov_b32_e32 v15, v18 1338; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15] 1339; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15 1340; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v18 1341; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec 1342; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14 1343; GFX9-O0-NEXT: v_or_b32_e64 v22, v8, v9 1344; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec 1345; GFX9-O0-NEXT: v_mov_b32_e32 v23, v5 1346; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v0, v2, 0 1347; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15 1348; GFX9-O0-NEXT: ; implicit-def: $sgpr6 1349; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5 1350; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec 1351; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2 1352; GFX9-O0-NEXT: v_mov_b32_e32 v8, v22 1353; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 1354; GFX9-O0-NEXT: v_mov_b32_e32 v2, v23 1355; GFX9-O0-NEXT: v_mov_b32_e32 v5, v19 1356; GFX9-O0-NEXT: v_add_co_u32_e64 v8, s[6:7], v8, v9 1357; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7] 1358; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1359; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2 1360; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 1361; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0xffffffff 1362; GFX9-O0-NEXT: s_mov_b32 s8, s7 1363; GFX9-O0-NEXT: v_and_b32_e64 v2, v2, s8 1364; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 1365; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7 1366; GFX9-O0-NEXT: v_and_b32_e64 v18, v5, s6 1367; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec 1368; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2 1369; GFX9-O0-NEXT: v_mad_u64_u32 v[22:23], s[6:7], v0, v1, 0 1370; GFX9-O0-NEXT: v_mov_b32_e32 v1, v22 1371; GFX9-O0-NEXT: ; implicit-def: $sgpr6 1372; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5 1373; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 1374; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 1375; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 1376; GFX9-O0-NEXT: v_mov_b32_e32 v22, v23 1377; GFX9-O0-NEXT: ; implicit-def: $sgpr6 1378; GFX9-O0-NEXT: ; implicit-def: $sgpr7 1379; GFX9-O0-NEXT: ; implicit-def: $sgpr7 1380; GFX9-O0-NEXT: v_mov_b32_e32 v5, s6 1381; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec 1382; GFX9-O0-NEXT: v_mov_b32_e32 v23, v5 1383; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s4, v[22:23] 1384; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 1385; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v5 1386; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec 1387; GFX9-O0-NEXT: v_mov_b32_e32 v2, v22 1388; GFX9-O0-NEXT: v_or_b32_e64 v1, v1, v2 1389; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 1390; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 1391; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 1392; GFX9-O0-NEXT: v_mov_b32_e32 v5, v18 1393; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 1394; GFX9-O0-NEXT: v_mov_b32_e32 v2, v19 1395; GFX9-O0-NEXT: v_add_co_u32_e64 v0, s[6:7], v0, v5 1396; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v1, v2, s[6:7] 1397; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1398; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 1399; GFX9-O0-NEXT: v_lshrrev_b64 v[18:19], s4, v[0:1] 1400; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], s4, v[8:9] 1401; GFX9-O0-NEXT: v_mov_b32_e32 v8, v22 1402; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 1403; GFX9-O0-NEXT: v_mov_b32_e32 v2, v23 1404; GFX9-O0-NEXT: v_mov_b32_e32 v5, v19 1405; GFX9-O0-NEXT: v_add_co_u32_e64 v18, s[6:7], v8, v9 1406; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7] 1407; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec 1408; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2 1409; GFX9-O0-NEXT: v_mov_b32_e32 v8, v20 1410; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 1411; GFX9-O0-NEXT: v_mov_b32_e32 v2, v21 1412; GFX9-O0-NEXT: v_mov_b32_e32 v5, v19 1413; GFX9-O0-NEXT: v_add_co_u32_e64 v18, s[6:7], v8, v9 1414; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7] 1415; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec 1416; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2 1417; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18 1418; GFX9-O0-NEXT: v_mov_b32_e32 v9, v16 1419; GFX9-O0-NEXT: v_mov_b32_e32 v2, v19 1420; GFX9-O0-NEXT: v_mov_b32_e32 v5, v17 1421; GFX9-O0-NEXT: v_add_co_u32_e64 v8, s[6:7], v8, v9 1422; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7] 1423; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1424; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2 1425; GFX9-O0-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1] 1426; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 1427; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec 1428; GFX9-O0-NEXT: ; implicit-def: $sgpr6 1429; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5 1430; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 1431; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2 1432; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15 1433; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v5 1434; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 1435; GFX9-O0-NEXT: v_mov_b32_e32 v0, v14 1436; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1 1437; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1438; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 1439; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 1440; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 1441; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0 1442; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1 1443; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3 1444; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 1445; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 1446; GFX9-O0-NEXT: v_mov_b32_e32 v1, v13 1447; GFX9-O0-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v9 1448; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v8, vcc 1449; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v5, vcc 1450; GFX9-O0-NEXT: v_subb_co_u32_e32 v2, vcc, v1, v2, vcc 1451; GFX9-O0-NEXT: ; implicit-def: $sgpr5 1452; GFX9-O0-NEXT: ; implicit-def: $sgpr5 1453; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1454; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 1455; GFX9-O0-NEXT: ; implicit-def: $sgpr5 1456; GFX9-O0-NEXT: ; implicit-def: $sgpr5 1457; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 1458; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 1459; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 1460; GFX9-O0-NEXT: v_mov_b32_e32 v2, v7 1461; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v2 1462; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 1463; GFX9-O0-NEXT: v_mov_b32_e32 v4, v6 1464; GFX9-O0-NEXT: v_xor_b32_e64 v8, v5, v4 1465; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1466; GFX9-O0-NEXT: v_mov_b32_e32 v9, v3 1467; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 1468; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 1469; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v6 1470; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 1471; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 1472; GFX9-O0-NEXT: v_xor_b32_e64 v0, v0, v7 1473; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1474; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 1475; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 1476; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 1477; GFX9-O0-NEXT: v_mov_b32_e32 v3, v8 1478; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9 1479; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v7 1480; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v6, vcc 1481; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc 1482; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc 1483; GFX9-O0-NEXT: ; implicit-def: $sgpr5 1484; GFX9-O0-NEXT: ; implicit-def: $sgpr5 1485; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec 1486; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 1487; GFX9-O0-NEXT: ; implicit-def: $sgpr5 1488; GFX9-O0-NEXT: ; implicit-def: $sgpr5 1489; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 1490; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0 1491; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5 1492; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 1493; GFX9-O0-NEXT: v_lshrrev_b64 v[5:6], s4, v[5:6] 1494; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 1495; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4] 1496; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec 1497; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 1498; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload 1499; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 1500; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 1501; GFX9-O0-NEXT: s_setpc_b64 s[30:31] 1502 %div = srem i128 %lhs, %rhs 1503 ret i128 %div 1504} 1505 1506define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { 1507; GFX9-LABEL: v_urem_i128_vv: 1508; GFX9: ; %bb.0: ; %_udiv-special-cases 1509; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1510; GFX9-NEXT: v_or_b32_e32 v9, v5, v7 1511; GFX9-NEXT: v_or_b32_e32 v8, v4, v6 1512; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1513; GFX9-NEXT: v_or_b32_e32 v9, v1, v3 1514; GFX9-NEXT: v_or_b32_e32 v8, v0, v2 1515; GFX9-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[8:9] 1516; GFX9-NEXT: v_ffbh_u32_e32 v8, v6 1517; GFX9-NEXT: v_add_u32_e32 v8, 32, v8 1518; GFX9-NEXT: v_ffbh_u32_e32 v9, v7 1519; GFX9-NEXT: v_min_u32_e32 v8, v8, v9 1520; GFX9-NEXT: v_ffbh_u32_e32 v9, v4 1521; GFX9-NEXT: v_add_u32_e32 v9, 32, v9 1522; GFX9-NEXT: v_ffbh_u32_e32 v10, v5 1523; GFX9-NEXT: v_min_u32_e32 v9, v9, v10 1524; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1525; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, 64, v9 1526; GFX9-NEXT: v_addc_co_u32_e64 v10, s[6:7], 0, 0, vcc 1527; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] 1528; GFX9-NEXT: v_ffbh_u32_e32 v11, v3 1529; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc 1530; GFX9-NEXT: v_ffbh_u32_e32 v9, v2 1531; GFX9-NEXT: v_add_u32_e32 v9, 32, v9 1532; GFX9-NEXT: v_min_u32_e32 v9, v9, v11 1533; GFX9-NEXT: v_ffbh_u32_e32 v11, v0 1534; GFX9-NEXT: v_add_u32_e32 v11, 32, v11 1535; GFX9-NEXT: v_ffbh_u32_e32 v12, v1 1536; GFX9-NEXT: v_min_u32_e32 v11, v11, v12 1537; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc 1538; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, 64, v11 1539; GFX9-NEXT: v_addc_co_u32_e64 v12, s[6:7], 0, 0, vcc 1540; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 1541; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f 1542; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc 1543; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, 0, vcc 1544; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, v8, v9 1545; GFX9-NEXT: v_subb_co_u32_e32 v9, vcc, v10, v12, vcc 1546; GFX9-NEXT: v_mov_b32_e32 v11, 0 1547; GFX9-NEXT: v_subbrev_co_u32_e32 v10, vcc, 0, v11, vcc 1548; GFX9-NEXT: v_subbrev_co_u32_e32 v11, vcc, 0, v11, vcc 1549; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9] 1550; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1551; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] 1552; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1553; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 1554; GFX9-NEXT: v_cndmask_b32_e32 v12, v13, v12, vcc 1555; GFX9-NEXT: v_and_b32_e32 v12, 1, v12 1556; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12 1557; GFX9-NEXT: v_xor_b32_e32 v12, 0x7f, v8 1558; GFX9-NEXT: v_or_b32_e32 v13, v9, v11 1559; GFX9-NEXT: v_or_b32_e32 v12, v12, v10 1560; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1561; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] 1562; GFX9-NEXT: s_xor_b64 s[6:7], s[4:5], -1 1563; GFX9-NEXT: v_cndmask_b32_e64 v15, v3, 0, s[4:5] 1564; GFX9-NEXT: v_cndmask_b32_e64 v14, v2, 0, s[4:5] 1565; GFX9-NEXT: v_cndmask_b32_e64 v13, v1, 0, s[4:5] 1566; GFX9-NEXT: v_cndmask_b32_e64 v12, v0, 0, s[4:5] 1567; GFX9-NEXT: s_and_b64 s[4:5], s[6:7], vcc 1568; GFX9-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] 1569; GFX9-NEXT: s_cbranch_execz .LBB1_6 1570; GFX9-NEXT: ; %bb.1: ; %udiv-bb1 1571; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, 1, v8 1572; GFX9-NEXT: v_addc_co_u32_e32 v23, vcc, 0, v9, vcc 1573; GFX9-NEXT: v_addc_co_u32_e32 v24, vcc, 0, v10, vcc 1574; GFX9-NEXT: v_sub_u32_e32 v15, 0x7f, v8 1575; GFX9-NEXT: v_addc_co_u32_e32 v25, vcc, 0, v11, vcc 1576; GFX9-NEXT: v_sub_u32_e32 v13, 64, v15 1577; GFX9-NEXT: v_or_b32_e32 v10, v23, v25 1578; GFX9-NEXT: v_or_b32_e32 v9, v22, v24 1579; GFX9-NEXT: v_lshlrev_b64 v[11:12], v15, v[2:3] 1580; GFX9-NEXT: v_lshrrev_b64 v[13:14], v13, v[0:1] 1581; GFX9-NEXT: v_sub_u32_e32 v8, 63, v8 1582; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[9:10] 1583; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[0:1] 1584; GFX9-NEXT: v_or_b32_e32 v10, v12, v14 1585; GFX9-NEXT: v_or_b32_e32 v11, v11, v13 1586; GFX9-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v15 1587; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[4:5] 1588; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v11, s[4:5] 1589; GFX9-NEXT: v_lshlrev_b64 v[10:11], v15, v[0:1] 1590; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v15 1591; GFX9-NEXT: v_mov_b32_e32 v12, 0 1592; GFX9-NEXT: v_mov_b32_e32 v14, 0 1593; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v3, s[6:7] 1594; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v2, s[6:7] 1595; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, v11, s[4:5] 1596; GFX9-NEXT: v_mov_b32_e32 v13, 0 1597; GFX9-NEXT: v_mov_b32_e32 v15, 0 1598; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[4:5] 1599; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 1600; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 1601; GFX9-NEXT: s_cbranch_execz .LBB1_5 1602; GFX9-NEXT: ; %bb.2: ; %udiv-preheader 1603; GFX9-NEXT: v_sub_u32_e32 v14, 64, v22 1604; GFX9-NEXT: v_lshrrev_b64 v[12:13], v22, v[0:1] 1605; GFX9-NEXT: v_lshlrev_b64 v[14:15], v14, v[2:3] 1606; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v22 1607; GFX9-NEXT: v_or_b32_e32 v14, v12, v14 1608; GFX9-NEXT: v_subrev_u32_e32 v12, 64, v22 1609; GFX9-NEXT: v_or_b32_e32 v15, v13, v15 1610; GFX9-NEXT: v_lshrrev_b64 v[12:13], v12, v[2:3] 1611; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v22 1612; GFX9-NEXT: v_cndmask_b32_e32 v13, v13, v15, vcc 1613; GFX9-NEXT: v_cndmask_b32_e64 v17, v13, v1, s[4:5] 1614; GFX9-NEXT: v_cndmask_b32_e32 v14, v12, v14, vcc 1615; GFX9-NEXT: v_lshrrev_b64 v[12:13], v22, v[2:3] 1616; GFX9-NEXT: v_cndmask_b32_e64 v16, v14, v0, s[4:5] 1617; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v13, vcc 1618; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v12, vcc 1619; GFX9-NEXT: v_add_co_u32_e32 v26, vcc, -1, v4 1620; GFX9-NEXT: v_addc_co_u32_e32 v27, vcc, -1, v5, vcc 1621; GFX9-NEXT: v_addc_co_u32_e32 v28, vcc, -1, v6, vcc 1622; GFX9-NEXT: v_mov_b32_e32 v20, 0 1623; GFX9-NEXT: v_mov_b32_e32 v14, 0 1624; GFX9-NEXT: v_addc_co_u32_e32 v29, vcc, -1, v7, vcc 1625; GFX9-NEXT: s_mov_b64 s[4:5], 0 1626; GFX9-NEXT: v_mov_b32_e32 v21, 0 1627; GFX9-NEXT: v_mov_b32_e32 v15, 0 1628; GFX9-NEXT: v_mov_b32_e32 v13, 0 1629; GFX9-NEXT: .LBB1_3: ; %udiv-do-while 1630; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 1631; GFX9-NEXT: v_lshlrev_b64 v[30:31], 1, v[10:11] 1632; GFX9-NEXT: v_lshrrev_b32_e32 v12, 31, v11 1633; GFX9-NEXT: v_or_b32_e32 v10, v20, v30 1634; GFX9-NEXT: v_lshrrev_b32_e32 v20, 31, v17 1635; GFX9-NEXT: v_lshlrev_b64 v[16:17], 1, v[16:17] 1636; GFX9-NEXT: v_or_b32_e32 v11, v21, v31 1637; GFX9-NEXT: v_lshlrev_b64 v[18:19], 1, v[18:19] 1638; GFX9-NEXT: v_lshrrev_b32_e32 v21, 31, v9 1639; GFX9-NEXT: v_or_b32_e32 v16, v16, v21 1640; GFX9-NEXT: v_or_b32_e32 v18, v18, v20 1641; GFX9-NEXT: v_sub_co_u32_e32 v20, vcc, v26, v16 1642; GFX9-NEXT: v_subb_co_u32_e32 v20, vcc, v27, v17, vcc 1643; GFX9-NEXT: v_subb_co_u32_e32 v20, vcc, v28, v18, vcc 1644; GFX9-NEXT: v_subb_co_u32_e32 v20, vcc, v29, v19, vcc 1645; GFX9-NEXT: v_ashrrev_i32_e32 v30, 31, v20 1646; GFX9-NEXT: v_and_b32_e32 v20, v30, v4 1647; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[8:9] 1648; GFX9-NEXT: v_sub_co_u32_e32 v16, vcc, v16, v20 1649; GFX9-NEXT: v_and_b32_e32 v20, v30, v5 1650; GFX9-NEXT: v_subb_co_u32_e32 v17, vcc, v17, v20, vcc 1651; GFX9-NEXT: v_or3_b32 v8, v8, v12, v14 1652; GFX9-NEXT: v_and_b32_e32 v12, v30, v6 1653; GFX9-NEXT: v_and_b32_e32 v20, v30, v7 1654; GFX9-NEXT: v_subb_co_u32_e32 v18, vcc, v18, v12, vcc 1655; GFX9-NEXT: v_subb_co_u32_e32 v19, vcc, v19, v20, vcc 1656; GFX9-NEXT: v_add_co_u32_e32 v22, vcc, -1, v22 1657; GFX9-NEXT: v_addc_co_u32_e32 v23, vcc, -1, v23, vcc 1658; GFX9-NEXT: v_addc_co_u32_e32 v24, vcc, -1, v24, vcc 1659; GFX9-NEXT: v_addc_co_u32_e32 v25, vcc, -1, v25, vcc 1660; GFX9-NEXT: v_or_b32_e32 v20, v22, v24 1661; GFX9-NEXT: v_or_b32_e32 v21, v23, v25 1662; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21] 1663; GFX9-NEXT: v_and_b32_e32 v12, 1, v30 1664; GFX9-NEXT: v_mov_b32_e32 v21, v13 1665; GFX9-NEXT: v_or3_b32 v9, v9, 0, v15 1666; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1667; GFX9-NEXT: v_mov_b32_e32 v20, v12 1668; GFX9-NEXT: s_andn2_b64 exec, exec, s[4:5] 1669; GFX9-NEXT: s_cbranch_execnz .LBB1_3 1670; GFX9-NEXT: ; %bb.4: ; %Flow 1671; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 1672; GFX9-NEXT: .LBB1_5: ; %Flow2 1673; GFX9-NEXT: s_or_b64 exec, exec, s[6:7] 1674; GFX9-NEXT: v_lshlrev_b64 v[16:17], 1, v[10:11] 1675; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[8:9] 1676; GFX9-NEXT: v_lshrrev_b32_e32 v10, 31, v11 1677; GFX9-NEXT: v_or3_b32 v15, v9, 0, v15 1678; GFX9-NEXT: v_or3_b32 v14, v8, v10, v14 1679; GFX9-NEXT: v_or_b32_e32 v13, v13, v17 1680; GFX9-NEXT: v_or_b32_e32 v12, v12, v16 1681; GFX9-NEXT: .LBB1_6: ; %Flow3 1682; GFX9-NEXT: s_or_b64 exec, exec, s[8:9] 1683; GFX9-NEXT: v_mul_lo_u32 v19, v12, v7 1684; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v4, v12, 0 1685; GFX9-NEXT: v_mov_b32_e32 v17, 0 1686; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v6, 0 1687; GFX9-NEXT: v_mov_b32_e32 v16, v8 1688; GFX9-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v5, v12, v[16:17] 1689; GFX9-NEXT: v_mul_lo_u32 v18, v13, v6 1690; GFX9-NEXT: v_mul_lo_u32 v16, v15, v4 1691; GFX9-NEXT: v_mov_b32_e32 v6, v12 1692; GFX9-NEXT: v_mov_b32_e32 v12, v17 1693; GFX9-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v4, v13, v[11:12] 1694; GFX9-NEXT: v_add3_u32 v10, v10, v19, v18 1695; GFX9-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v14, v4, v[9:10] 1696; GFX9-NEXT: v_mov_b32_e32 v4, v12 1697; GFX9-NEXT: v_mul_lo_u32 v10, v14, v5 1698; GFX9-NEXT: v_add_co_u32_e32 v14, vcc, v6, v4 1699; GFX9-NEXT: v_addc_co_u32_e64 v15, s[4:5], 0, 0, vcc 1700; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v13, v[14:15] 1701; GFX9-NEXT: v_add3_u32 v6, v16, v9, v10 1702; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v8 1703; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v5, v6, vcc 1704; GFX9-NEXT: v_mov_b32_e32 v6, v11 1705; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v7 1706; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc 1707; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v4, vcc 1708; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v5, vcc 1709; GFX9-NEXT: s_setpc_b64 s[30:31] 1710; 1711; GFX9-O0-LABEL: v_urem_i128_vv: 1712; GFX9-O0: ; %bb.0: ; %_udiv-special-cases 1713; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1714; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 1715; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill 1716; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 1717; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6 1718; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill 1719; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2 1720; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0 1721; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload 1722; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1723; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1724; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1725; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 1726; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1727; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1728; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 1729; GFX9-O0-NEXT: v_mov_b32_e32 v13, v3 1730; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1731; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1732; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1733; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 1734; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1735; GFX9-O0-NEXT: ; implicit-def: $sgpr4 1736; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec 1737; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7 1738; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 1739; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 1740; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 1741; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 1742; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12 1743; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 1744; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill 1745; GFX9-O0-NEXT: s_nop 0 1746; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill 1747; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 1748; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9 1749; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill 1750; GFX9-O0-NEXT: s_nop 0 1751; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill 1752; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10 1753; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 1754; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill 1755; GFX9-O0-NEXT: s_nop 0 1756; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill 1757; GFX9-O0-NEXT: s_waitcnt vmcnt(6) 1758; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 1759; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 1760; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill 1761; GFX9-O0-NEXT: s_nop 0 1762; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill 1763; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10 1764; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 1765; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill 1766; GFX9-O0-NEXT: s_nop 0 1767; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill 1768; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 1769; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 1770; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill 1771; GFX9-O0-NEXT: s_nop 0 1772; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill 1773; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12 1774; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 1775; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill 1776; GFX9-O0-NEXT: s_nop 0 1777; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill 1778; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 1779; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9 1780; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill 1781; GFX9-O0-NEXT: s_nop 0 1782; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill 1783; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 1784; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 1785; GFX9-O0-NEXT: v_or_b32_e64 v2, v7, v6 1786; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 1787; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 1788; GFX9-O0-NEXT: v_or_b32_e64 v0, v4, v5 1789; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1790; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 1791; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 1792; GFX9-O0-NEXT: ; implicit-def: $vgpr30 : SGPR spill to VGPR lane 1793; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 0 1794; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 1 1795; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[0:1], s[6:7] 1796; GFX9-O0-NEXT: v_mov_b32_e32 v1, v13 1797; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9 1798; GFX9-O0-NEXT: v_or_b32_e64 v14, v3, v1 1799; GFX9-O0-NEXT: v_mov_b32_e32 v0, v12 1800; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 1801; GFX9-O0-NEXT: v_or_b32_e64 v8, v2, v0 1802; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1803; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14 1804; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7] 1805; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 1806; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v5 1807; GFX9-O0-NEXT: s_mov_b32 s9, 32 1808; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9 1809; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6 1810; GFX9-O0-NEXT: v_min_u32_e64 v5, v5, v6 1811; GFX9-O0-NEXT: s_mov_b32 s8, 0 1812; GFX9-O0-NEXT: ; implicit-def: $sgpr10 1813; GFX9-O0-NEXT: v_mov_b32_e32 v8, s8 1814; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 1815; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 1816; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6 1817; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v4 1818; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9 1819; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7 1820; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v7 1821; GFX9-O0-NEXT: ; implicit-def: $sgpr10 1822; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 1823; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 1824; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4 1825; GFX9-O0-NEXT: s_mov_b64 s[10:11], 64 1826; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 1827; GFX9-O0-NEXT: s_mov_b32 s12, s10 1828; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15 1829; GFX9-O0-NEXT: s_mov_b32 s14, s11 1830; GFX9-O0-NEXT: v_add_co_u32_e64 v7, s[12:13], v7, s12 1831; GFX9-O0-NEXT: v_mov_b32_e32 v8, s14 1832; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13] 1833; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec 1834; GFX9-O0-NEXT: v_mov_b32_e32 v8, v4 1835; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 1836; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7] 1837; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[10:11], s[12:13] 1838; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[12:13] 1839; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 1840; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 1841; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v5, v6, s[12:13] 1842; GFX9-O0-NEXT: ; implicit-def: $sgpr12 1843; GFX9-O0-NEXT: ; implicit-def: $sgpr12 1844; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 1845; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4 1846; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0 1847; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9 1848; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1 1849; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5 1850; GFX9-O0-NEXT: ; implicit-def: $sgpr12 1851; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 1852; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 1853; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 1854; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 1855; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2 1856; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9 1857; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3 1858; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v10 1859; GFX9-O0-NEXT: ; implicit-def: $sgpr9 1860; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 1861; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 1862; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4 1863; GFX9-O0-NEXT: v_mov_b32_e32 v10, v14 1864; GFX9-O0-NEXT: s_mov_b32 s8, s10 1865; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15 1866; GFX9-O0-NEXT: s_mov_b32 s10, s11 1867; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[8:9], v10, s8 1868; GFX9-O0-NEXT: v_mov_b32_e32 v11, s10 1869; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[8:9], v4, v11, s[8:9] 1870; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec 1871; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4 1872; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11 1873; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 1874; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[12:13], s[8:9] 1875; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[8:9] 1876; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 1877; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 1878; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9] 1879; GFX9-O0-NEXT: ; implicit-def: $sgpr8 1880; GFX9-O0-NEXT: ; implicit-def: $sgpr8 1881; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 1882; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 1883; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 1884; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 1885; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec 1886; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 1887; GFX9-O0-NEXT: s_mov_b32 s10, s6 1888; GFX9-O0-NEXT: s_mov_b32 s11, s7 1889; GFX9-O0-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v7 1890; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v5, v6, vcc 1891; GFX9-O0-NEXT: v_mov_b32_e32 v6, s10 1892; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10 1893; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v6, vcc 1894; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11 1895; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 1896; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc 1897; GFX9-O0-NEXT: ; implicit-def: $sgpr8 1898; GFX9-O0-NEXT: ; implicit-def: $sgpr8 1899; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 1900; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 1901; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill 1902; GFX9-O0-NEXT: s_nop 0 1903; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill 1904; GFX9-O0-NEXT: ; implicit-def: $sgpr8 1905; GFX9-O0-NEXT: ; implicit-def: $sgpr8 1906; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec 1907; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6 1908; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill 1909; GFX9-O0-NEXT: s_nop 0 1910; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill 1911; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7] 1912; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f 1913; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13] 1914; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15] 1915; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7] 1916; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15] 1917; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9] 1918; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6 1919; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1 1920; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] 1921; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1 1922; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] 1923; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 1924; GFX9-O0-NEXT: s_mov_b32 s14, s13 1925; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14 1926; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13 1927; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12 1928; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 1929; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 1930; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 1931; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 1932; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v9 1933; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec 1934; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 1935; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 1936; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 1937; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 1938; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7] 1939; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 1940; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9] 1941; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 1942; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9] 1943; GFX9-O0-NEXT: ; implicit-def: $sgpr12 1944; GFX9-O0-NEXT: ; implicit-def: $sgpr12 1945; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 1946; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 1947; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 1948; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9] 1949; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10 1950; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9] 1951; GFX9-O0-NEXT: ; implicit-def: $sgpr8 1952; GFX9-O0-NEXT: ; implicit-def: $sgpr8 1953; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 1954; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 1955; GFX9-O0-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7] 1956; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill 1957; GFX9-O0-NEXT: s_nop 0 1958; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 1959; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 1960; GFX9-O0-NEXT: s_nop 0 1961; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 1962; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec 1963; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 2 1964; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 3 1965; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 1966; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 1967; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 1968; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 1969; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 1970; GFX9-O0-NEXT: s_cbranch_execz .LBB1_3 1971; GFX9-O0-NEXT: s_branch .LBB1_8 1972; GFX9-O0-NEXT: .LBB1_1: ; %Flow 1973; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 1974; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 1975; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 1976; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 1977; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4 1978; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5 1979; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] 1980; GFX9-O0-NEXT: ; %bb.2: ; %Flow 1981; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload 1982; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload 1983; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload 1984; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload 1985; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload 1986; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload 1987; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload 1988; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload 1989; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 1990; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill 1991; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 1992; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill 1993; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 1994; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill 1995; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 1996; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill 1997; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 1998; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill 1999; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 2000; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill 2001; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 2002; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill 2003; GFX9-O0-NEXT: s_waitcnt vmcnt(7) 2004; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill 2005; GFX9-O0-NEXT: s_branch .LBB1_5 2006; GFX9-O0-NEXT: .LBB1_3: ; %Flow2 2007; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 2008; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 2009; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 2010; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2011; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 2 2012; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 3 2013; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] 2014; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload 2015; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload 2016; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 2017; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload 2018; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 2019; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill 2020; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 2021; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill 2022; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill 2023; GFX9-O0-NEXT: s_nop 0 2024; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill 2025; GFX9-O0-NEXT: s_branch .LBB1_9 2026; GFX9-O0-NEXT: .LBB1_4: ; %udiv-loop-exit 2027; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload 2028; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload 2029; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload 2030; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload 2031; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload 2032; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload 2033; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload 2034; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload 2035; GFX9-O0-NEXT: s_mov_b32 s4, 1 2036; GFX9-O0-NEXT: s_waitcnt vmcnt(2) 2037; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[0:1] 2038; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2039; GFX9-O0-NEXT: v_lshlrev_b64 v[9:10], s4, v[9:10] 2040; GFX9-O0-NEXT: s_mov_b32 s4, 63 2041; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 2042; GFX9-O0-NEXT: v_mov_b32_e32 v11, v1 2043; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 2044; GFX9-O0-NEXT: v_mov_b32_e32 v12, v8 2045; GFX9-O0-NEXT: v_or3_b32 v4, v4, v11, v12 2046; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 2047; GFX9-O0-NEXT: v_mov_b32_e32 v0, v9 2048; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7 2049; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2050; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 2051; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 2052; GFX9-O0-NEXT: v_mov_b32_e32 v4, v6 2053; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v7 2054; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 2055; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 2056; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 2057; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2058; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 2059; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill 2060; GFX9-O0-NEXT: s_nop 0 2061; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill 2062; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 2063; GFX9-O0-NEXT: s_nop 0 2064; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill 2065; GFX9-O0-NEXT: s_branch .LBB1_3 2066; GFX9-O0-NEXT: .LBB1_5: ; %Flow1 2067; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 2068; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 2069; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 2070; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2071; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6 2072; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7 2073; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] 2074; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload 2075; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload 2076; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload 2077; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload 2078; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload 2079; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload 2080; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload 2081; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload 2082; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 2083; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill 2084; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 2085; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill 2086; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill 2087; GFX9-O0-NEXT: s_nop 0 2088; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill 2089; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill 2090; GFX9-O0-NEXT: s_nop 0 2091; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill 2092; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill 2093; GFX9-O0-NEXT: s_nop 0 2094; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill 2095; GFX9-O0-NEXT: s_branch .LBB1_4 2096; GFX9-O0-NEXT: .LBB1_6: ; %udiv-do-while 2097; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1 2098; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 2099; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 2100; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 2101; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2102; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 8 2103; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 9 2104; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload 2105; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload 2106; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload 2107; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload 2108; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload 2109; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload 2110; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload 2111; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload 2112; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload 2113; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload 2114; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload 2115; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload 2116; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload 2117; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload 2118; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload 2119; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload 2120; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload 2121; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload 2122; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload 2123; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload 2124; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload 2125; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload 2126; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload 2127; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload 2128; GFX9-O0-NEXT: s_mov_b32 s4, 63 2129; GFX9-O0-NEXT: s_waitcnt vmcnt(16) 2130; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3] 2131; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29 2132; GFX9-O0-NEXT: s_mov_b32 s5, 1 2133; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s5, v[22:23] 2134; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23 2135; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 2136; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28 2137; GFX9-O0-NEXT: v_mov_b32_e32 v5, v22 2138; GFX9-O0-NEXT: v_or_b32_e64 v22, v5, v10 2139; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec 2140; GFX9-O0-NEXT: v_mov_b32_e32 v23, v4 2141; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[2:3] 2142; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], s4, v[6:7] 2143; GFX9-O0-NEXT: v_mov_b32_e32 v2, v29 2144; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 2145; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 2146; GFX9-O0-NEXT: v_mov_b32_e32 v3, v28 2147; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec 2148; GFX9-O0-NEXT: v_or_b32_e64 v4, v3, v4 2149; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 2150; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 2151; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s5, v[0:1] 2152; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[6:7] 2153; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 2154; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 2155; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29 2156; GFX9-O0-NEXT: s_waitcnt vmcnt(10) 2157; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27 2158; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10 2159; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 2160; GFX9-O0-NEXT: v_mov_b32_e32 v0, v28 2161; GFX9-O0-NEXT: v_mov_b32_e32 v7, v26 2162; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7 2163; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2164; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 2165; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 2166; GFX9-O0-NEXT: s_waitcnt vmcnt(8) 2167; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25 2168; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 2169; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 2170; GFX9-O0-NEXT: v_mov_b32_e32 v2, v24 2171; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 2172; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2173; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 2174; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 2175; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 2176; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22 2177; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 2178; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 2179; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 2180; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 2181; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15 2182; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6 2183; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2184; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc 2185; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc 2186; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc 2187; GFX9-O0-NEXT: ; implicit-def: $sgpr5 2188; GFX9-O0-NEXT: ; implicit-def: $sgpr5 2189; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec 2190; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 2191; GFX9-O0-NEXT: v_ashrrev_i64 v[13:14], s4, v[11:12] 2192; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 2193; GFX9-O0-NEXT: s_mov_b64 s[4:5], 1 2194; GFX9-O0-NEXT: s_mov_b32 s8, s5 2195; GFX9-O0-NEXT: v_and_b32_e64 v12, v7, s8 2196; GFX9-O0-NEXT: v_mov_b32_e32 v11, v13 2197; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2198; GFX9-O0-NEXT: v_and_b32_e64 v14, v11, s4 2199; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 2200; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12 2201; GFX9-O0-NEXT: v_mov_b32_e32 v12, 0 2202; GFX9-O0-NEXT: v_mov_b32_e32 v13, 0 2203; GFX9-O0-NEXT: v_mov_b32_e32 v22, v21 2204; GFX9-O0-NEXT: v_and_b32_e64 v22, v7, v22 2205; GFX9-O0-NEXT: v_and_b32_e64 v20, v11, v20 2206; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec 2207; GFX9-O0-NEXT: v_mov_b32_e32 v21, v22 2208; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19 2209; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v22 2210; GFX9-O0-NEXT: v_and_b32_e64 v22, v11, v18 2211; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec 2212; GFX9-O0-NEXT: v_mov_b32_e32 v23, v7 2213; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22 2214; GFX9-O0-NEXT: v_mov_b32_e32 v18, v23 2215; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 2216; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21 2217; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v19 2218; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v18, vcc 2219; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v11, vcc 2220; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v7, vcc 2221; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2222; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2223; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 2224; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 2225; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2226; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2227; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 2228; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 2229; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8 2230; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 killed $vgpr8_vgpr9 killed $exec 2231; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 2232; GFX9-O0-NEXT: s_mov_b32 s5, s8 2233; GFX9-O0-NEXT: s_mov_b32 s4, s9 2234; GFX9-O0-NEXT: v_mov_b32_e32 v10, v16 2235; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 2236; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 2237; GFX9-O0-NEXT: v_add_co_u32_e32 v19, vcc, v11, v16 2238; GFX9-O0-NEXT: v_mov_b32_e32 v11, s4 2239; GFX9-O0-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v11, vcc 2240; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5 2241; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v10, v11, vcc 2242; GFX9-O0-NEXT: v_mov_b32_e32 v10, s4 2243; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v8, v10, vcc 2244; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2245; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2246; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec 2247; GFX9-O0-NEXT: v_mov_b32_e32 v20, v9 2248; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2249; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2250; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 2251; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8 2252; GFX9-O0-NEXT: v_mov_b32_e32 v8, v16 2253; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17 2254; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19 2255; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 2256; GFX9-O0-NEXT: v_mov_b32_e32 v21, v17 2257; GFX9-O0-NEXT: v_mov_b32_e32 v18, v20 2258; GFX9-O0-NEXT: v_or_b32_e64 v18, v18, v21 2259; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16 2260; GFX9-O0-NEXT: v_mov_b32_e32 v16, v19 2261; GFX9-O0-NEXT: v_or_b32_e64 v16, v16, v17 2262; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 2263; GFX9-O0-NEXT: v_mov_b32_e32 v17, v18 2264; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13] 2265; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 2266; GFX9-O0-NEXT: v_mov_b32_e32 v17, v3 2267; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2 2268; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill 2269; GFX9-O0-NEXT: s_nop 0 2270; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill 2271; GFX9-O0-NEXT: v_mov_b32_e32 v17, v1 2272; GFX9-O0-NEXT: v_mov_b32_e32 v16, v0 2273; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill 2274; GFX9-O0-NEXT: s_nop 0 2275; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill 2276; GFX9-O0-NEXT: v_mov_b32_e32 v17, v15 2277; GFX9-O0-NEXT: v_mov_b32_e32 v16, v14 2278; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill 2279; GFX9-O0-NEXT: s_nop 0 2280; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill 2281; GFX9-O0-NEXT: v_mov_b32_e32 v17, v13 2282; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12 2283; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill 2284; GFX9-O0-NEXT: s_nop 0 2285; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill 2286; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] 2287; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 4 2288; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 5 2289; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] 2290; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8 2291; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9 2292; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 2293; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 2294; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 2295; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill 2296; GFX9-O0-NEXT: s_nop 0 2297; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill 2298; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill 2299; GFX9-O0-NEXT: s_nop 0 2300; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill 2301; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill 2302; GFX9-O0-NEXT: s_nop 0 2303; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill 2304; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill 2305; GFX9-O0-NEXT: s_nop 0 2306; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill 2307; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill 2308; GFX9-O0-NEXT: s_nop 0 2309; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill 2310; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill 2311; GFX9-O0-NEXT: s_nop 0 2312; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill 2313; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill 2314; GFX9-O0-NEXT: s_nop 0 2315; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill 2316; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill 2317; GFX9-O0-NEXT: s_nop 0 2318; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill 2319; GFX9-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] 2320; GFX9-O0-NEXT: s_cbranch_execnz .LBB1_6 2321; GFX9-O0-NEXT: s_branch .LBB1_1 2322; GFX9-O0-NEXT: .LBB1_7: ; %udiv-preheader 2323; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 2324; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 2325; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 2326; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload 2327; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload 2328; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload 2329; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload 2330; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload 2331; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload 2332; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload 2333; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload 2334; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload 2335; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload 2336; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload 2337; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload 2338; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload 2339; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload 2340; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload 2341; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload 2342; GFX9-O0-NEXT: s_waitcnt vmcnt(9) 2343; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 2344; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2345; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21] 2346; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 2347; GFX9-O0-NEXT: s_mov_b32 s6, 64 2348; GFX9-O0-NEXT: v_sub_u32_e64 v12, s6, v4 2349; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], v12, v[18:19] 2350; GFX9-O0-NEXT: v_mov_b32_e32 v12, v23 2351; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12 2352; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec 2353; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22 2354; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 2355; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 2356; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 2357; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 2358; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v4, s6 2359; GFX9-O0-NEXT: v_sub_u32_e64 v5, v4, s6 2360; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], v5, v[18:19] 2361; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 2362; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5] 2363; GFX9-O0-NEXT: s_mov_b32 s6, 0 2364; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, s6 2365; GFX9-O0-NEXT: v_mov_b32_e32 v12, v21 2366; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[6:7] 2367; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 2368; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22 2369; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] 2370; GFX9-O0-NEXT: v_mov_b32_e32 v7, v20 2371; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7] 2372; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2373; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2374; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 2375; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 2376; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[18:19] 2377; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5 2378; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 2379; GFX9-O0-NEXT: s_mov_b32 s8, s7 2380; GFX9-O0-NEXT: v_mov_b32_e32 v12, s8 2381; GFX9-O0-NEXT: v_cndmask_b32_e64 v12, v12, v15, s[4:5] 2382; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 2383; GFX9-O0-NEXT: s_mov_b32 s8, s6 2384; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 2385; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] 2386; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2387; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2388; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 2389; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 2390; GFX9-O0-NEXT: v_mov_b32_e32 v12, v13 2391; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14 2392; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 2393; GFX9-O0-NEXT: s_mov_b32 s5, s8 2394; GFX9-O0-NEXT: s_mov_b32 s4, s9 2395; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16 2396; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 2397; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 2398; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v16 2399; GFX9-O0-NEXT: v_mov_b32_e32 v16, s4 2400; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v15, v16, vcc 2401; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5 2402; GFX9-O0-NEXT: v_addc_co_u32_e32 v14, vcc, v14, v15, vcc 2403; GFX9-O0-NEXT: v_mov_b32_e32 v15, s4 2404; GFX9-O0-NEXT: v_addc_co_u32_e32 v13, vcc, v13, v15, vcc 2405; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2406; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2407; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 2408; GFX9-O0-NEXT: v_mov_b32_e32 v15, v13 2409; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2410; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2411; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 2412; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 2413; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 2414; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill 2415; GFX9-O0-NEXT: s_nop 0 2416; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill 2417; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill 2418; GFX9-O0-NEXT: s_nop 0 2419; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill 2420; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] 2421; GFX9-O0-NEXT: v_mov_b32_e32 v15, s9 2422; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8 2423; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7 2424; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 2425; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 8 2426; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 9 2427; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 2428; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 2429; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 2430; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill 2431; GFX9-O0-NEXT: s_nop 0 2432; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill 2433; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill 2434; GFX9-O0-NEXT: s_nop 0 2435; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill 2436; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill 2437; GFX9-O0-NEXT: s_nop 0 2438; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill 2439; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill 2440; GFX9-O0-NEXT: s_nop 0 2441; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill 2442; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill 2443; GFX9-O0-NEXT: s_nop 0 2444; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill 2445; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill 2446; GFX9-O0-NEXT: s_nop 0 2447; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill 2448; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill 2449; GFX9-O0-NEXT: s_nop 0 2450; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill 2451; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill 2452; GFX9-O0-NEXT: s_nop 0 2453; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill 2454; GFX9-O0-NEXT: s_branch .LBB1_6 2455; GFX9-O0-NEXT: .LBB1_8: ; %udiv-bb1 2456; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 2457; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload 2458; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 2459; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload 2460; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload 2461; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload 2462; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload 2463; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload 2464; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload 2465; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload 2466; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload 2467; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1 2468; GFX9-O0-NEXT: s_mov_b32 s5, s6 2469; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 2470; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 2471; GFX9-O0-NEXT: s_mov_b32 s4, s7 2472; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 2473; GFX9-O0-NEXT: s_mov_b32 s8, s6 2474; GFX9-O0-NEXT: s_mov_b32 s9, s7 2475; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 2476; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 2477; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 2478; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4 2479; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4 2480; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2481; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc 2482; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 2483; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc 2484; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9 2485; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc 2486; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2487; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2488; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 2489; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 2490; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2491; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2492; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2493; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 2494; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 2495; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 2496; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill 2497; GFX9-O0-NEXT: s_nop 0 2498; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill 2499; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 2500; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 2501; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill 2502; GFX9-O0-NEXT: s_nop 0 2503; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill 2504; GFX9-O0-NEXT: s_mov_b32 s4, 0x7f 2505; GFX9-O0-NEXT: v_sub_u32_e64 v2, s4, v3 2506; GFX9-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[10:11] 2507; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 2508; GFX9-O0-NEXT: s_mov_b32 s4, 64 2509; GFX9-O0-NEXT: v_sub_u32_e64 v13, s4, v2 2510; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], v13, v[6:7] 2511; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14 2512; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v15 2513; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec 2514; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 2515; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 2516; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 2517; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 2518; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5 2519; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v2, s4 2520; GFX9-O0-NEXT: s_mov_b32 s10, 63 2521; GFX9-O0-NEXT: v_sub_u32_e64 v3, s10, v3 2522; GFX9-O0-NEXT: v_lshlrev_b64 v[12:13], v3, v[6:7] 2523; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 2524; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[4:5] 2525; GFX9-O0-NEXT: s_mov_b32 s10, 0 2526; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v2, s10 2527; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11 2528; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[10:11] 2529; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 2530; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 2531; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] 2532; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 2533; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[10:11] 2534; GFX9-O0-NEXT: ; implicit-def: $sgpr10 2535; GFX9-O0-NEXT: ; implicit-def: $sgpr10 2536; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 2537; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3 2538; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], v2, v[6:7] 2539; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7 2540; GFX9-O0-NEXT: v_mov_b32_e32 v2, s9 2541; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] 2542; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec 2543; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8 2544; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v3, v6, s[4:5] 2545; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2546; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2547; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 2548; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 2549; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill 2550; GFX9-O0-NEXT: s_nop 0 2551; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill 2552; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill 2553; GFX9-O0-NEXT: s_nop 0 2554; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill 2555; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 2556; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 2557; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 2558; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 2559; GFX9-O0-NEXT: v_mov_b32_e32 v0, v8 2560; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1 2561; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2562; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 2563; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7] 2564; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] 2565; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8 2566; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9 2567; GFX9-O0-NEXT: v_mov_b32_e32 v0, s6 2568; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7 2569; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill 2570; GFX9-O0-NEXT: s_nop 0 2571; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill 2572; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill 2573; GFX9-O0-NEXT: s_nop 0 2574; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill 2575; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill 2576; GFX9-O0-NEXT: s_nop 0 2577; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill 2578; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill 2579; GFX9-O0-NEXT: s_nop 0 2580; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill 2581; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec 2582; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] 2583; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] 2584; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6 2585; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7 2586; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 2587; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill 2588; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] 2589; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 2590; GFX9-O0-NEXT: s_cbranch_execz .LBB1_5 2591; GFX9-O0-NEXT: s_branch .LBB1_7 2592; GFX9-O0-NEXT: .LBB1_9: ; %udiv-end 2593; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload 2594; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload 2595; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload 2596; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload 2597; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload 2598; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload 2599; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload 2600; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload 2601; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload 2602; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload 2603; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload 2604; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload 2605; GFX9-O0-NEXT: s_mov_b32 s4, 32 2606; GFX9-O0-NEXT: s_waitcnt vmcnt(2) 2607; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[6:7] 2608; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 2609; GFX9-O0-NEXT: s_waitcnt vmcnt(1) 2610; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12 2611; GFX9-O0-NEXT: v_mul_lo_u32 v4, v5, v2 2612; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2613; GFX9-O0-NEXT: v_lshrrev_b64 v[12:13], s4, v[12:13] 2614; GFX9-O0-NEXT: v_mov_b32_e32 v3, v12 2615; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec 2616; GFX9-O0-NEXT: v_mul_lo_u32 v3, v6, v3 2617; GFX9-O0-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v6, v2, 0 2618; GFX9-O0-NEXT: v_mov_b32_e32 v2, v13 2619; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v4 2620; GFX9-O0-NEXT: ; implicit-def: $sgpr5 2621; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2622; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2623; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 2624; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2625; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 2626; GFX9-O0-NEXT: v_lshlrev_b64 v[3:4], s4, v[2:3] 2627; GFX9-O0-NEXT: v_mov_b32_e32 v7, v4 2628; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 killed $vgpr12_vgpr13 killed $exec 2629; GFX9-O0-NEXT: s_mov_b32 s5, 0 2630; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2631; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5 2632; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 2633; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2 2634; GFX9-O0-NEXT: v_mov_b32_e32 v2, v13 2635; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v7 2636; GFX9-O0-NEXT: v_mov_b32_e32 v4, v3 2637; GFX9-O0-NEXT: v_mov_b32_e32 v3, v12 2638; GFX9-O0-NEXT: v_or_b32_e64 v12, v3, v4 2639; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 2640; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2 2641; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[14:15] 2642; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 2643; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10 2644; GFX9-O0-NEXT: v_mul_lo_u32 v3, v2, v7 2645; GFX9-O0-NEXT: v_lshrrev_b64 v[10:11], s4, v[10:11] 2646; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec 2647; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14 2648; GFX9-O0-NEXT: v_mul_lo_u32 v10, v10, v4 2649; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v2, v4, 0 2650; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15 2651; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v10 2652; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2653; GFX9-O0-NEXT: ; implicit-def: $sgpr7 2654; GFX9-O0-NEXT: ; implicit-def: $sgpr7 2655; GFX9-O0-NEXT: v_mov_b32_e32 v10, s6 2656; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2657; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 2658; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[2:3] 2659; GFX9-O0-NEXT: v_mov_b32_e32 v11, v3 2660; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec 2661; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2662; GFX9-O0-NEXT: v_mov_b32_e32 v10, s5 2663; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 2664; GFX9-O0-NEXT: v_mov_b32_e32 v15, v10 2665; GFX9-O0-NEXT: v_mov_b32_e32 v10, v15 2666; GFX9-O0-NEXT: v_or_b32_e64 v10, v10, v11 2667; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 2668; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14 2669; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 2670; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2671; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 2672; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2 2673; GFX9-O0-NEXT: v_mov_b32_e32 v11, v12 2674; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 2675; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 2676; GFX9-O0-NEXT: v_add_co_u32_e64 v12, s[6:7], v10, v11 2677; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7] 2678; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec 2679; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2 2680; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v7, v5, 0 2681; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14 2682; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2683; GFX9-O0-NEXT: v_mov_b32_e32 v10, s5 2684; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2685; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 2686; GFX9-O0-NEXT: v_mov_b32_e32 v10, v3 2687; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 2688; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2689; GFX9-O0-NEXT: ; implicit-def: $sgpr7 2690; GFX9-O0-NEXT: ; implicit-def: $sgpr7 2691; GFX9-O0-NEXT: v_mov_b32_e32 v11, s6 2692; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 2693; GFX9-O0-NEXT: v_mov_b32_e32 v15, v11 2694; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15] 2695; GFX9-O0-NEXT: v_mov_b32_e32 v11, v15 2696; GFX9-O0-NEXT: v_or_b32_e64 v10, v10, v11 2697; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec 2698; GFX9-O0-NEXT: v_mov_b32_e32 v3, v14 2699; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 2700; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2701; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 2702; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v7, v6, 0 2703; GFX9-O0-NEXT: v_mov_b32_e32 v10, v14 2704; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2705; GFX9-O0-NEXT: v_mov_b32_e32 v7, s5 2706; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec 2707; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7 2708; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11 2709; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 2710; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2711; GFX9-O0-NEXT: ; implicit-def: $sgpr7 2712; GFX9-O0-NEXT: ; implicit-def: $sgpr7 2713; GFX9-O0-NEXT: v_mov_b32_e32 v16, s6 2714; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 2715; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 2716; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15] 2717; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15 2718; GFX9-O0-NEXT: v_or_b32_e64 v7, v7, v16 2719; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec 2720; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 2721; GFX9-O0-NEXT: v_or_b32_e64 v18, v10, v11 2722; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec 2723; GFX9-O0-NEXT: v_mov_b32_e32 v19, v7 2724; GFX9-O0-NEXT: v_mad_u64_u32 v[10:11], s[6:7], v4, v6, 0 2725; GFX9-O0-NEXT: v_mov_b32_e32 v16, v11 2726; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2727; GFX9-O0-NEXT: v_mov_b32_e32 v6, s5 2728; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 2729; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6 2730; GFX9-O0-NEXT: v_mov_b32_e32 v6, v18 2731; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 2732; GFX9-O0-NEXT: v_mov_b32_e32 v7, v19 2733; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17 2734; GFX9-O0-NEXT: v_add_co_u32_e64 v6, s[6:7], v6, v15 2735; GFX9-O0-NEXT: v_addc_co_u32_e64 v14, s[6:7], v7, v14, s[6:7] 2736; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 2737; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 2738; GFX9-O0-NEXT: v_mov_b32_e32 v14, v7 2739; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0xffffffff 2740; GFX9-O0-NEXT: s_mov_b32 s8, s7 2741; GFX9-O0-NEXT: v_and_b32_e64 v14, v14, s8 2742; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6 2743; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7 2744; GFX9-O0-NEXT: v_and_b32_e64 v16, v15, s6 2745; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec 2746; GFX9-O0-NEXT: v_mov_b32_e32 v17, v14 2747; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v4, v5, 0 2748; GFX9-O0-NEXT: v_mov_b32_e32 v18, v14 2749; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2750; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 2751; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec 2752; GFX9-O0-NEXT: v_mov_b32_e32 v19, v4 2753; GFX9-O0-NEXT: v_mov_b32_e32 v4, v19 2754; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 2755; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2756; GFX9-O0-NEXT: ; implicit-def: $sgpr7 2757; GFX9-O0-NEXT: ; implicit-def: $sgpr7 2758; GFX9-O0-NEXT: v_mov_b32_e32 v5, s6 2759; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 2760; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5 2761; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15] 2762; GFX9-O0-NEXT: v_mov_b32_e32 v5, v15 2763; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 2764; GFX9-O0-NEXT: v_mov_b32_e32 v5, v18 2765; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec 2766; GFX9-O0-NEXT: v_or_b32_e64 v18, v5, v14 2767; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec 2768; GFX9-O0-NEXT: v_mov_b32_e32 v19, v4 2769; GFX9-O0-NEXT: v_mov_b32_e32 v4, v18 2770; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 2771; GFX9-O0-NEXT: v_mov_b32_e32 v5, v19 2772; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17 2773; GFX9-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v4, v15 2774; GFX9-O0-NEXT: v_addc_co_u32_e64 v14, s[6:7], v5, v14, s[6:7] 2775; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec 2776; GFX9-O0-NEXT: v_mov_b32_e32 v5, v14 2777; GFX9-O0-NEXT: v_lshrrev_b64 v[16:17], s4, v[4:5] 2778; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], s4, v[6:7] 2779; GFX9-O0-NEXT: v_mov_b32_e32 v14, v6 2780; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 2781; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 2782; GFX9-O0-NEXT: v_mov_b32_e32 v7, v17 2783; GFX9-O0-NEXT: v_add_co_u32_e64 v14, s[6:7], v14, v15 2784; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v6, v7, s[6:7] 2785; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 2786; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6 2787; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 2788; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 2789; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 2790; GFX9-O0-NEXT: v_mov_b32_e32 v3, v15 2791; GFX9-O0-NEXT: v_add_co_u32_e64 v14, s[6:7], v6, v7 2792; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7] 2793; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec 2794; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2 2795; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14 2796; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12 2797; GFX9-O0-NEXT: v_mov_b32_e32 v3, v15 2798; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13 2799; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[6:7], v2, v7 2800; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v3, v6, s[6:7] 2801; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec 2802; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 2803; GFX9-O0-NEXT: v_lshlrev_b64 v[5:6], s4, v[4:5] 2804; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 2805; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec 2806; GFX9-O0-NEXT: ; implicit-def: $sgpr6 2807; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 2808; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec 2809; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4 2810; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11 2811; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v7 2812; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 2813; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 2814; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6 2815; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 2816; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 2817; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 2818; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 2819; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 2820; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec 2821; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 2822; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 2823; GFX9-O0-NEXT: v_mov_b32_e32 v3, v8 2824; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9 2825; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v7 2826; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v6, vcc 2827; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc 2828; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc 2829; GFX9-O0-NEXT: ; implicit-def: $sgpr5 2830; GFX9-O0-NEXT: ; implicit-def: $sgpr5 2831; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec 2832; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 2833; GFX9-O0-NEXT: ; implicit-def: $sgpr5 2834; GFX9-O0-NEXT: ; implicit-def: $sgpr5 2835; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 2836; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0 2837; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5 2838; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 2839; GFX9-O0-NEXT: v_lshrrev_b64 v[5:6], s4, v[5:6] 2840; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 2841; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4] 2842; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec 2843; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 2844; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload 2845; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] 2846; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2847; GFX9-O0-NEXT: s_setpc_b64 s[30:31] 2848 %div = urem i128 %lhs, %rhs 2849 ret i128 %div 2850} 2851 2852define i128 @v_srem_i128_v_pow2k(i128 %lhs) { 2853; GFX9-LABEL: v_srem_i128_v_pow2k: 2854; GFX9: ; %bb.0: 2855; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2856; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v3 2857; GFX9-NEXT: v_mov_b32_e32 v5, v4 2858; GFX9-NEXT: v_lshrrev_b64 v[4:5], 31, v[4:5] 2859; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v0, v4 2860; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v5, vcc 2861; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v2, vcc 2862; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v3, vcc 2863; GFX9-NEXT: v_and_b32_e32 v4, -2, v4 2864; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, 0, v0 2865; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v4, vcc 2866; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v5, vcc 2867; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v6, vcc 2868; GFX9-NEXT: s_setpc_b64 s[30:31] 2869; 2870; GFX9-O0-LABEL: v_srem_i128_v_pow2k: 2871; GFX9-O0: ; %bb.0: 2872; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2873; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 2874; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 2875; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2876; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2877; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 2878; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 2879; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2880; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2881; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2882; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 2883; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 2884; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 2885; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 2886; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 2887; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 2888; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7 2889; GFX9-O0-NEXT: s_mov_b32 s4, 63 2890; GFX9-O0-NEXT: v_ashrrev_i64 v[6:7], s4, v[6:7] 2891; GFX9-O0-NEXT: s_mov_b32 s4, 31 2892; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], s4, v[6:7] 2893; GFX9-O0-NEXT: v_mov_b32_e32 v4, v6 2894; GFX9-O0-NEXT: v_mov_b32_e32 v2, v7 2895; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 2896; GFX9-O0-NEXT: s_mov_b32 s5, s6 2897; GFX9-O0-NEXT: s_mov_b32 s4, s7 2898; GFX9-O0-NEXT: v_add_co_u32_e32 v6, vcc, v5, v4 2899; GFX9-O0-NEXT: v_addc_co_u32_e32 v4, vcc, v0, v2, vcc 2900; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5 2901; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v3, v2, vcc 2902; GFX9-O0-NEXT: v_mov_b32_e32 v2, s4 2903; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v1, v2, vcc 2904; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2905; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2906; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec 2907; GFX9-O0-NEXT: v_mov_b32_e32 v7, v4 2908; GFX9-O0-NEXT: v_mov_b32_e32 v4, v7 2909; GFX9-O0-NEXT: s_mov_b32 s6, -2 2910; GFX9-O0-NEXT: s_mov_b32 s4, 0 2911; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 2912; GFX9-O0-NEXT: s_mov_b32 s5, s6 2913; GFX9-O0-NEXT: s_mov_b32 s6, s5 2914; GFX9-O0-NEXT: v_and_b32_e64 v4, v4, s6 2915; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec 2916; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2917; GFX9-O0-NEXT: v_and_b32_e64 v9, v6, s4 2918; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec 2919; GFX9-O0-NEXT: v_mov_b32_e32 v10, v4 2920; GFX9-O0-NEXT: v_mov_b32_e32 v7, v9 2921; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 2922; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2923; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2924; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec 2925; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2 2926; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 2927; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 2928; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v7 2929; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v6, vcc 2930; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc 2931; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc 2932; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2933; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2934; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec 2935; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 2936; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2937; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2938; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec 2939; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0 2940; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5 2941; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 2942; GFX9-O0-NEXT: s_mov_b32 s4, 32 2943; GFX9-O0-NEXT: v_lshrrev_b64 v[5:6], s4, v[5:6] 2944; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 2945; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4] 2946; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec 2947; GFX9-O0-NEXT: s_setpc_b64 s[30:31] 2948 %div = srem i128 %lhs, 8589934592 2949 ret i128 %div 2950} 2951 2952define i128 @v_urem_i128_v_pow2k(i128 %lhs) { 2953; GFX9-LABEL: v_urem_i128_v_pow2k: 2954; GFX9: ; %bb.0: 2955; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2956; GFX9-NEXT: v_and_b32_e32 v1, 1, v1 2957; GFX9-NEXT: v_mov_b32_e32 v2, 0 2958; GFX9-NEXT: v_mov_b32_e32 v3, 0 2959; GFX9-NEXT: s_setpc_b64 s[30:31] 2960; 2961; GFX9-O0-LABEL: v_urem_i128_v_pow2k: 2962; GFX9-O0: ; %bb.0: 2963; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2964; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 ; 4-byte Folded Spill 2965; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 2966; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 2967; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload 2968; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2969; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2970; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec 2971; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr1 killed $exec 2972; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2973; GFX9-O0-NEXT: ; implicit-def: $sgpr4 2974; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec 2975; GFX9-O0-NEXT: s_waitcnt vmcnt(0) 2976; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 2977; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 2978; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 2979; GFX9-O0-NEXT: s_mov_b32 s6, 1 2980; GFX9-O0-NEXT: s_mov_b32 s4, -1 2981; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 2982; GFX9-O0-NEXT: s_mov_b32 s5, s6 2983; GFX9-O0-NEXT: s_mov_b32 s6, s5 2984; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 2985; GFX9-O0-NEXT: v_and_b32_e64 v3, v2, s6 2986; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 2987; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec 2988; GFX9-O0-NEXT: v_and_b32_e64 v1, v0, s4 2989; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec 2990; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 2991; GFX9-O0-NEXT: s_mov_b32 s4, 32 2992; GFX9-O0-NEXT: v_lshrrev_b64 v[1:2], s4, v[1:2] 2993; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec 2994; GFX9-O0-NEXT: v_mov_b32_e32 v3, 0 2995; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 2996; GFX9-O0-NEXT: s_setpc_b64 s[30:31] 2997 %div = urem i128 %lhs, 8589934592 2998 ret i128 %div 2999} 3000 3001;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 3002; GFX9-SDAG: {{.*}} 3003; GFX9-SDAG-O0: {{.*}} 3004