1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDAG %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GISEL %s 4 5define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { 6; SDAG-LABEL: v_sdiv_v2i128_vv: 7; SDAG: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases 8; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9; SDAG-NEXT: v_sub_i32_e32 v16, vcc, 0, v0 10; SDAG-NEXT: v_mov_b32_e32 v18, 0 11; SDAG-NEXT: v_ashrrev_i32_e32 v24, 31, v3 12; SDAG-NEXT: v_ashrrev_i32_e32 v25, 31, v11 13; SDAG-NEXT: s_mov_b64 s[10:11], 0x7f 14; SDAG-NEXT: v_subb_u32_e32 v17, vcc, 0, v1, vcc 15; SDAG-NEXT: v_mov_b32_e32 v26, v24 16; SDAG-NEXT: v_mov_b32_e32 v27, v25 17; SDAG-NEXT: v_subb_u32_e32 v19, vcc, 0, v2, vcc 18; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3] 19; SDAG-NEXT: v_cndmask_b32_e64 v21, v1, v17, s[4:5] 20; SDAG-NEXT: v_cndmask_b32_e64 v20, v0, v16, s[4:5] 21; SDAG-NEXT: v_subb_u32_e32 v0, vcc, 0, v3, vcc 22; SDAG-NEXT: v_cndmask_b32_e64 v16, v2, v19, s[4:5] 23; SDAG-NEXT: v_ffbh_u32_e32 v1, v20 24; SDAG-NEXT: v_ffbh_u32_e32 v2, v21 25; SDAG-NEXT: v_cndmask_b32_e64 v17, v3, v0, s[4:5] 26; SDAG-NEXT: v_or_b32_e32 v0, v20, v16 27; SDAG-NEXT: v_sub_i32_e32 v3, vcc, 0, v8 28; SDAG-NEXT: v_add_i32_e64 v19, s[4:5], 32, v1 29; SDAG-NEXT: v_ffbh_u32_e32 v22, v16 30; SDAG-NEXT: v_or_b32_e32 v1, v21, v17 31; SDAG-NEXT: v_subb_u32_e32 v23, vcc, 0, v9, vcc 32; SDAG-NEXT: v_min_u32_e32 v2, v19, v2 33; SDAG-NEXT: v_add_i32_e64 v19, s[4:5], 32, v22 34; SDAG-NEXT: v_ffbh_u32_e32 v22, v17 35; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] 36; SDAG-NEXT: v_cmp_gt_i64_e64 s[6:7], 0, v[10:11] 37; SDAG-NEXT: v_cndmask_b32_e64 v28, v9, v23, s[6:7] 38; SDAG-NEXT: v_subb_u32_e32 v0, vcc, 0, v10, vcc 39; SDAG-NEXT: v_cndmask_b32_e64 v29, v8, v3, s[6:7] 40; SDAG-NEXT: v_min_u32_e32 v1, v19, v22 41; SDAG-NEXT: v_add_i32_e64 v2, s[8:9], 64, v2 42; SDAG-NEXT: v_addc_u32_e64 v3, s[8:9], 0, 0, s[8:9] 43; SDAG-NEXT: v_subb_u32_e32 v8, vcc, 0, v11, vcc 44; SDAG-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[6:7] 45; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17] 46; SDAG-NEXT: v_cndmask_b32_e64 v9, v3, 0, vcc 47; SDAG-NEXT: v_cndmask_b32_e32 v10, v2, v1, vcc 48; SDAG-NEXT: v_ffbh_u32_e32 v3, v29 49; SDAG-NEXT: v_ffbh_u32_e32 v19, v28 50; SDAG-NEXT: v_cndmask_b32_e64 v1, v11, v8, s[6:7] 51; SDAG-NEXT: v_or_b32_e32 v2, v29, v0 52; SDAG-NEXT: v_add_i32_e32 v8, vcc, 32, v3 53; SDAG-NEXT: v_ffbh_u32_e32 v11, v0 54; SDAG-NEXT: v_or_b32_e32 v3, v28, v1 55; SDAG-NEXT: v_min_u32_e32 v8, v8, v19 56; SDAG-NEXT: v_add_i32_e32 v11, vcc, 32, v11 57; SDAG-NEXT: v_ffbh_u32_e32 v19, v1 58; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] 59; SDAG-NEXT: v_min_u32_e32 v2, v11, v19 60; SDAG-NEXT: v_add_i32_e64 v3, s[6:7], 64, v8 61; SDAG-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] 62; SDAG-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[0:1] 63; SDAG-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[6:7] 64; SDAG-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[6:7] 65; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5] 66; SDAG-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 67; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v8, v9, vcc 68; SDAG-NEXT: v_xor_b32_e32 v8, 0x7f, v2 69; SDAG-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v18, vcc 70; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[2:3] 71; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] 72; SDAG-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v18, vcc 73; SDAG-NEXT: v_or_b32_e32 v8, v8, v10 74; SDAG-NEXT: v_or_b32_e32 v9, v3, v11 75; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] 76; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 77; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] 78; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[10:11] 79; SDAG-NEXT: v_cndmask_b32_e64 v8, v18, v19, s[4:5] 80; SDAG-NEXT: v_and_b32_e32 v8, 1, v8 81; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v8 82; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 83; SDAG-NEXT: v_cndmask_b32_e64 v18, v17, 0, s[4:5] 84; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1 85; SDAG-NEXT: v_cndmask_b32_e64 v22, v16, 0, s[4:5] 86; SDAG-NEXT: v_cndmask_b32_e64 v19, v21, 0, s[4:5] 87; SDAG-NEXT: s_and_b64 s[8:9], s[6:7], vcc 88; SDAG-NEXT: v_cndmask_b32_e64 v23, v20, 0, s[4:5] 89; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] 90; SDAG-NEXT: s_cbranch_execz .LBB0_6 91; SDAG-NEXT: ; %bb.1: ; %udiv-bb15 92; SDAG-NEXT: v_add_i32_e32 v30, vcc, 1, v2 93; SDAG-NEXT: v_sub_i32_e64 v18, s[4:5], 63, v2 94; SDAG-NEXT: v_mov_b32_e32 v8, 0 95; SDAG-NEXT: v_mov_b32_e32 v9, 0 96; SDAG-NEXT: v_addc_u32_e32 v31, vcc, 0, v3, vcc 97; SDAG-NEXT: v_lshl_b64 v[18:19], v[20:21], v18 98; SDAG-NEXT: v_addc_u32_e32 v32, vcc, 0, v10, vcc 99; SDAG-NEXT: v_addc_u32_e32 v33, vcc, 0, v11, vcc 100; SDAG-NEXT: v_or_b32_e32 v10, v30, v32 101; SDAG-NEXT: v_sub_i32_e32 v34, vcc, 0x7f, v2 102; SDAG-NEXT: v_or_b32_e32 v11, v31, v33 103; SDAG-NEXT: v_lshl_b64 v[2:3], v[16:17], v34 104; SDAG-NEXT: v_sub_i32_e32 v35, vcc, 64, v34 105; SDAG-NEXT: v_lshl_b64 v[22:23], v[20:21], v34 106; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] 107; SDAG-NEXT: v_lshr_b64 v[10:11], v[20:21], v35 108; SDAG-NEXT: v_or_b32_e32 v3, v3, v11 109; SDAG-NEXT: v_or_b32_e32 v2, v2, v10 110; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v34 111; SDAG-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[4:5] 112; SDAG-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] 113; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, v23, s[4:5] 114; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, v22, s[4:5] 115; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v34 116; SDAG-NEXT: v_cndmask_b32_e64 v3, v3, v17, s[4:5] 117; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v16, s[4:5] 118; SDAG-NEXT: v_mov_b32_e32 v10, 0 119; SDAG-NEXT: v_mov_b32_e32 v11, 0 120; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc 121; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 122; SDAG-NEXT: s_cbranch_execz .LBB0_5 123; SDAG-NEXT: ; %bb.2: ; %udiv-preheader4 124; SDAG-NEXT: v_lshr_b64 v[8:9], v[20:21], v30 125; SDAG-NEXT: v_sub_i32_e32 v10, vcc, 64, v30 126; SDAG-NEXT: v_lshl_b64 v[10:11], v[16:17], v10 127; SDAG-NEXT: v_or_b32_e32 v11, v9, v11 128; SDAG-NEXT: v_or_b32_e32 v10, v8, v10 129; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v30 130; SDAG-NEXT: v_subrev_i32_e64 v8, s[4:5], 64, v30 131; SDAG-NEXT: v_lshr_b64 v[8:9], v[16:17], v8 132; SDAG-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc 133; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v30 134; SDAG-NEXT: v_cndmask_b32_e64 v21, v9, v21, s[4:5] 135; SDAG-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc 136; SDAG-NEXT: v_cndmask_b32_e64 v20, v8, v20, s[4:5] 137; SDAG-NEXT: v_lshr_b64 v[8:9], v[16:17], v30 138; SDAG-NEXT: v_cndmask_b32_e32 v23, 0, v9, vcc 139; SDAG-NEXT: v_cndmask_b32_e32 v22, 0, v8, vcc 140; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v29 141; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v28, vcc 142; SDAG-NEXT: v_addc_u32_e32 v36, vcc, -1, v0, vcc 143; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v1, vcc 144; SDAG-NEXT: s_mov_b64 s[4:5], 0 145; SDAG-NEXT: v_mov_b32_e32 v16, 0 146; SDAG-NEXT: v_mov_b32_e32 v17, 0 147; SDAG-NEXT: v_mov_b32_e32 v10, 0 148; SDAG-NEXT: v_mov_b32_e32 v11, 0 149; SDAG-NEXT: v_mov_b32_e32 v9, 0 150; SDAG-NEXT: .LBB0_3: ; %udiv-do-while3 151; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 152; SDAG-NEXT: v_lshrrev_b32_e32 v8, 31, v19 153; SDAG-NEXT: v_lshl_b64 v[18:19], v[18:19], 1 154; SDAG-NEXT: v_lshl_b64 v[22:23], v[22:23], 1 155; SDAG-NEXT: v_lshrrev_b32_e32 v38, 31, v21 156; SDAG-NEXT: v_lshl_b64 v[20:21], v[20:21], 1 157; SDAG-NEXT: v_lshrrev_b32_e32 v39, 31, v3 158; SDAG-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 159; SDAG-NEXT: v_or_b32_e32 v19, v17, v19 160; SDAG-NEXT: v_or_b32_e32 v18, v16, v18 161; SDAG-NEXT: v_or_b32_e32 v16, v22, v38 162; SDAG-NEXT: v_or_b32_e32 v17, v20, v39 163; SDAG-NEXT: v_or_b32_e32 v2, v2, v8 164; SDAG-NEXT: v_sub_i32_e32 v8, vcc, v34, v17 165; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v35, v21, vcc 166; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v36, v16, vcc 167; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v37, v23, vcc 168; SDAG-NEXT: v_ashrrev_i32_e32 v8, 31, v8 169; SDAG-NEXT: v_and_b32_e32 v20, v8, v29 170; SDAG-NEXT: v_and_b32_e32 v22, v8, v28 171; SDAG-NEXT: v_and_b32_e32 v38, v8, v0 172; SDAG-NEXT: v_and_b32_e32 v39, v8, v1 173; SDAG-NEXT: v_and_b32_e32 v8, 1, v8 174; SDAG-NEXT: v_sub_i32_e32 v20, vcc, v17, v20 175; SDAG-NEXT: v_subb_u32_e32 v21, vcc, v21, v22, vcc 176; SDAG-NEXT: v_subb_u32_e32 v22, vcc, v16, v38, vcc 177; SDAG-NEXT: v_subb_u32_e32 v23, vcc, v23, v39, vcc 178; SDAG-NEXT: v_add_i32_e32 v30, vcc, -1, v30 179; SDAG-NEXT: v_addc_u32_e32 v31, vcc, -1, v31, vcc 180; SDAG-NEXT: v_addc_u32_e32 v32, vcc, -1, v32, vcc 181; SDAG-NEXT: v_addc_u32_e32 v33, vcc, -1, v33, vcc 182; SDAG-NEXT: v_or_b32_e32 v16, v30, v32 183; SDAG-NEXT: v_or_b32_e32 v17, v31, v33 184; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] 185; SDAG-NEXT: v_or_b32_e32 v3, v11, v3 186; SDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 187; SDAG-NEXT: v_or_b32_e32 v2, v10, v2 188; SDAG-NEXT: v_mov_b32_e32 v17, v9 189; SDAG-NEXT: v_mov_b32_e32 v16, v8 190; SDAG-NEXT: s_andn2_b64 exec, exec, s[4:5] 191; SDAG-NEXT: s_cbranch_execnz .LBB0_3 192; SDAG-NEXT: ; %bb.4: ; %Flow13 193; SDAG-NEXT: s_or_b64 exec, exec, s[4:5] 194; SDAG-NEXT: .LBB0_5: ; %Flow14 195; SDAG-NEXT: s_or_b64 exec, exec, s[8:9] 196; SDAG-NEXT: v_lshl_b64 v[0:1], v[2:3], 1 197; SDAG-NEXT: v_lshrrev_b32_e32 v16, 31, v19 198; SDAG-NEXT: v_lshl_b64 v[2:3], v[18:19], 1 199; SDAG-NEXT: v_or_b32_e32 v0, v0, v16 200; SDAG-NEXT: v_or_b32_e32 v18, v11, v1 201; SDAG-NEXT: v_or_b32_e32 v19, v9, v3 202; SDAG-NEXT: v_or_b32_e32 v22, v10, v0 203; SDAG-NEXT: v_or_b32_e32 v23, v8, v2 204; SDAG-NEXT: .LBB0_6: ; %Flow16 205; SDAG-NEXT: s_or_b64 exec, exec, s[6:7] 206; SDAG-NEXT: v_ashrrev_i32_e32 v16, 31, v7 207; SDAG-NEXT: v_ashrrev_i32_e32 v17, 31, v15 208; SDAG-NEXT: v_sub_i32_e32 v0, vcc, 0, v4 209; SDAG-NEXT: v_mov_b32_e32 v8, 0 210; SDAG-NEXT: s_mov_b64 s[10:11], 0x7f 211; SDAG-NEXT: v_mov_b32_e32 v20, v16 212; SDAG-NEXT: v_mov_b32_e32 v21, v17 213; SDAG-NEXT: v_subb_u32_e32 v1, vcc, 0, v5, vcc 214; SDAG-NEXT: v_subb_u32_e32 v9, vcc, 0, v6, vcc 215; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7] 216; SDAG-NEXT: v_cndmask_b32_e64 v3, v5, v1, s[4:5] 217; SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v0, s[4:5] 218; SDAG-NEXT: v_subb_u32_e32 v0, vcc, 0, v7, vcc 219; SDAG-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[4:5] 220; SDAG-NEXT: v_ffbh_u32_e32 v1, v2 221; SDAG-NEXT: v_ffbh_u32_e32 v4, v3 222; SDAG-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[4:5] 223; SDAG-NEXT: v_sub_i32_e32 v5, vcc, 0, v12 224; SDAG-NEXT: v_or_b32_e32 v0, v2, v6 225; SDAG-NEXT: v_ffbh_u32_e32 v9, v6 226; SDAG-NEXT: v_add_i32_e64 v10, s[4:5], 32, v1 227; SDAG-NEXT: v_subb_u32_e32 v11, vcc, 0, v13, vcc 228; SDAG-NEXT: v_or_b32_e32 v1, v3, v7 229; SDAG-NEXT: v_add_i32_e64 v9, s[4:5], 32, v9 230; SDAG-NEXT: v_ffbh_u32_e32 v30, v7 231; SDAG-NEXT: v_min_u32_e32 v4, v10, v4 232; SDAG-NEXT: v_subb_u32_e32 v10, vcc, 0, v14, vcc 233; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[14:15] 234; SDAG-NEXT: v_cndmask_b32_e64 v28, v13, v11, s[4:5] 235; SDAG-NEXT: v_cndmask_b32_e64 v29, v12, v5, s[4:5] 236; SDAG-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[0:1] 237; SDAG-NEXT: v_min_u32_e32 v1, v9, v30 238; SDAG-NEXT: v_add_i32_e64 v4, s[8:9], 64, v4 239; SDAG-NEXT: v_addc_u32_e64 v5, s[8:9], 0, 0, s[8:9] 240; SDAG-NEXT: v_subb_u32_e32 v9, vcc, 0, v15, vcc 241; SDAG-NEXT: v_cndmask_b32_e64 v0, v14, v10, s[4:5] 242; SDAG-NEXT: v_ffbh_u32_e32 v10, v29 243; SDAG-NEXT: v_ffbh_u32_e32 v11, v28 244; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] 245; SDAG-NEXT: v_cndmask_b32_e64 v12, v5, 0, vcc 246; SDAG-NEXT: v_cndmask_b32_e32 v13, v4, v1, vcc 247; SDAG-NEXT: v_cndmask_b32_e64 v1, v15, v9, s[4:5] 248; SDAG-NEXT: v_or_b32_e32 v4, v29, v0 249; SDAG-NEXT: v_ffbh_u32_e32 v9, v0 250; SDAG-NEXT: v_add_i32_e32 v10, vcc, 32, v10 251; SDAG-NEXT: v_or_b32_e32 v5, v28, v1 252; SDAG-NEXT: v_add_i32_e32 v9, vcc, 32, v9 253; SDAG-NEXT: v_ffbh_u32_e32 v14, v1 254; SDAG-NEXT: v_min_u32_e32 v10, v10, v11 255; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] 256; SDAG-NEXT: v_min_u32_e32 v4, v9, v14 257; SDAG-NEXT: v_add_i32_e64 v5, s[4:5], 64, v10 258; SDAG-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] 259; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[6:7] 260; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 261; SDAG-NEXT: v_cndmask_b32_e64 v9, v9, 0, vcc 262; SDAG-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc 263; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v4, v13 264; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v9, v12, vcc 265; SDAG-NEXT: v_xor_b32_e32 v9, 0x7f, v4 266; SDAG-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v8, vcc 267; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[4:5] 268; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 269; SDAG-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v8, vcc 270; SDAG-NEXT: v_or_b32_e32 v8, v9, v10 271; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] 272; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 273; SDAG-NEXT: v_or_b32_e32 v9, v5, v11 274; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 275; SDAG-NEXT: v_cndmask_b32_e32 v12, v13, v12, vcc 276; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] 277; SDAG-NEXT: v_and_b32_e32 v8, 1, v12 278; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v8 279; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 280; SDAG-NEXT: v_cndmask_b32_e64 v13, v7, 0, s[4:5] 281; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1 282; SDAG-NEXT: v_cndmask_b32_e64 v9, v6, 0, s[4:5] 283; SDAG-NEXT: v_cndmask_b32_e64 v14, v3, 0, s[4:5] 284; SDAG-NEXT: v_cndmask_b32_e64 v8, v2, 0, s[4:5] 285; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc 286; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 287; SDAG-NEXT: s_cbranch_execz .LBB0_12 288; SDAG-NEXT: ; %bb.7: ; %udiv-bb1 289; SDAG-NEXT: v_add_i32_e32 v30, vcc, 1, v4 290; SDAG-NEXT: v_sub_i32_e64 v12, s[4:5], 63, v4 291; SDAG-NEXT: v_mov_b32_e32 v8, 0 292; SDAG-NEXT: v_mov_b32_e32 v9, 0 293; SDAG-NEXT: v_addc_u32_e32 v31, vcc, 0, v5, vcc 294; SDAG-NEXT: v_lshl_b64 v[12:13], v[2:3], v12 295; SDAG-NEXT: v_addc_u32_e32 v32, vcc, 0, v10, vcc 296; SDAG-NEXT: v_addc_u32_e32 v33, vcc, 0, v11, vcc 297; SDAG-NEXT: v_or_b32_e32 v10, v30, v32 298; SDAG-NEXT: v_sub_i32_e32 v34, vcc, 0x7f, v4 299; SDAG-NEXT: v_or_b32_e32 v11, v31, v33 300; SDAG-NEXT: v_lshl_b64 v[4:5], v[6:7], v34 301; SDAG-NEXT: v_sub_i32_e32 v35, vcc, 64, v34 302; SDAG-NEXT: v_lshl_b64 v[14:15], v[2:3], v34 303; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] 304; SDAG-NEXT: v_lshr_b64 v[10:11], v[2:3], v35 305; SDAG-NEXT: v_or_b32_e32 v5, v5, v11 306; SDAG-NEXT: v_or_b32_e32 v4, v4, v10 307; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v34 308; SDAG-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[4:5] 309; SDAG-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[4:5] 310; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v15, s[4:5] 311; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v14, s[4:5] 312; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v34 313; SDAG-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[4:5] 314; SDAG-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[4:5] 315; SDAG-NEXT: v_mov_b32_e32 v12, 0 316; SDAG-NEXT: v_mov_b32_e32 v13, 0 317; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc 318; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 319; SDAG-NEXT: s_cbranch_execz .LBB0_11 320; SDAG-NEXT: ; %bb.8: ; %udiv-preheader 321; SDAG-NEXT: v_lshr_b64 v[8:9], v[2:3], v30 322; SDAG-NEXT: v_sub_i32_e32 v35, vcc, 64, v30 323; SDAG-NEXT: v_subrev_i32_e32 v36, vcc, 64, v30 324; SDAG-NEXT: v_lshr_b64 v[37:38], v[6:7], v30 325; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v29 326; SDAG-NEXT: s_mov_b64 s[10:11], 0 327; SDAG-NEXT: v_mov_b32_e32 v14, 0 328; SDAG-NEXT: v_mov_b32_e32 v15, 0 329; SDAG-NEXT: v_mov_b32_e32 v12, 0 330; SDAG-NEXT: v_mov_b32_e32 v13, 0 331; SDAG-NEXT: v_lshl_b64 v[48:49], v[6:7], v35 332; SDAG-NEXT: v_lshr_b64 v[6:7], v[6:7], v36 333; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v28, vcc 334; SDAG-NEXT: v_or_b32_e32 v9, v9, v49 335; SDAG-NEXT: v_or_b32_e32 v8, v8, v48 336; SDAG-NEXT: v_addc_u32_e32 v36, vcc, -1, v0, vcc 337; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v30 338; SDAG-NEXT: v_cndmask_b32_e64 v9, v7, v9, s[4:5] 339; SDAG-NEXT: v_cndmask_b32_e64 v8, v6, v8, s[4:5] 340; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v38, s[4:5] 341; SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v37, s[4:5] 342; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v1, vcc 343; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v30 344; SDAG-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc 345; SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 346; SDAG-NEXT: v_mov_b32_e32 v9, 0 347; SDAG-NEXT: .LBB0_9: ; %udiv-do-while 348; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 349; SDAG-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 350; SDAG-NEXT: v_lshrrev_b32_e32 v8, 31, v3 351; SDAG-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 352; SDAG-NEXT: v_lshrrev_b32_e32 v38, 31, v5 353; SDAG-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 354; SDAG-NEXT: v_lshrrev_b32_e32 v39, 31, v11 355; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 356; SDAG-NEXT: v_or_b32_e32 v6, v6, v8 357; SDAG-NEXT: v_or_b32_e32 v2, v2, v38 358; SDAG-NEXT: v_or_b32_e32 v4, v4, v39 359; SDAG-NEXT: v_or_b32_e32 v5, v13, v5 360; SDAG-NEXT: v_or_b32_e32 v11, v15, v11 361; SDAG-NEXT: v_sub_i32_e32 v8, vcc, v34, v2 362; SDAG-NEXT: v_or_b32_e32 v4, v12, v4 363; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v35, v3, vcc 364; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v36, v6, vcc 365; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v37, v7, vcc 366; SDAG-NEXT: v_ashrrev_i32_e32 v8, 31, v8 367; SDAG-NEXT: v_and_b32_e32 v15, v8, v29 368; SDAG-NEXT: v_and_b32_e32 v38, v8, v28 369; SDAG-NEXT: v_and_b32_e32 v39, v8, v0 370; SDAG-NEXT: v_and_b32_e32 v48, v8, v1 371; SDAG-NEXT: v_sub_i32_e32 v2, vcc, v2, v15 372; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v3, v38, vcc 373; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v6, v39, vcc 374; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v7, v48, vcc 375; SDAG-NEXT: v_add_i32_e32 v30, vcc, -1, v30 376; SDAG-NEXT: v_addc_u32_e32 v31, vcc, -1, v31, vcc 377; SDAG-NEXT: v_addc_u32_e32 v32, vcc, -1, v32, vcc 378; SDAG-NEXT: v_addc_u32_e32 v33, vcc, -1, v33, vcc 379; SDAG-NEXT: v_or_b32_e32 v38, v30, v32 380; SDAG-NEXT: v_or_b32_e32 v39, v31, v33 381; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[38:39] 382; SDAG-NEXT: v_and_b32_e32 v8, 1, v8 383; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11] 384; SDAG-NEXT: v_or_b32_e32 v10, v14, v10 385; SDAG-NEXT: v_mov_b32_e32 v15, v9 386; SDAG-NEXT: v_mov_b32_e32 v14, v8 387; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11] 388; SDAG-NEXT: s_cbranch_execnz .LBB0_9 389; SDAG-NEXT: ; %bb.10: ; %Flow 390; SDAG-NEXT: s_or_b64 exec, exec, s[10:11] 391; SDAG-NEXT: .LBB0_11: ; %Flow11 392; SDAG-NEXT: s_or_b64 exec, exec, s[8:9] 393; SDAG-NEXT: v_lshl_b64 v[0:1], v[4:5], 1 394; SDAG-NEXT: v_lshrrev_b32_e32 v4, 31, v11 395; SDAG-NEXT: v_lshl_b64 v[2:3], v[10:11], 1 396; SDAG-NEXT: v_or_b32_e32 v0, v0, v4 397; SDAG-NEXT: v_or_b32_e32 v13, v13, v1 398; SDAG-NEXT: v_or_b32_e32 v14, v9, v3 399; SDAG-NEXT: v_or_b32_e32 v9, v12, v0 400; SDAG-NEXT: v_or_b32_e32 v8, v8, v2 401; SDAG-NEXT: .LBB0_12: ; %Flow12 402; SDAG-NEXT: s_or_b64 exec, exec, s[6:7] 403; SDAG-NEXT: v_xor_b32_e32 v3, v27, v26 404; SDAG-NEXT: v_xor_b32_e32 v2, v25, v24 405; SDAG-NEXT: v_xor_b32_e32 v7, v21, v20 406; SDAG-NEXT: v_xor_b32_e32 v6, v17, v16 407; SDAG-NEXT: v_xor_b32_e32 v4, v18, v3 408; SDAG-NEXT: v_xor_b32_e32 v5, v22, v2 409; SDAG-NEXT: v_xor_b32_e32 v1, v19, v3 410; SDAG-NEXT: v_xor_b32_e32 v0, v23, v2 411; SDAG-NEXT: v_xor_b32_e32 v10, v13, v7 412; SDAG-NEXT: v_xor_b32_e32 v9, v9, v6 413; SDAG-NEXT: v_xor_b32_e32 v11, v14, v7 414; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 415; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 416; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v5, v2, vcc 417; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v4, v3, vcc 418; SDAG-NEXT: v_xor_b32_e32 v4, v8, v6 419; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 420; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v11, v7, vcc 421; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v9, v6, vcc 422; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v10, v7, vcc 423; SDAG-NEXT: s_setpc_b64 s[30:31] 424; 425; GISEL-LABEL: v_sdiv_v2i128_vv: 426; GISEL: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases 427; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 428; GISEL-NEXT: v_ashrrev_i32_e32 v24, 31, v3 429; GISEL-NEXT: v_ashrrev_i32_e32 v25, 31, v11 430; GISEL-NEXT: v_mov_b32_e32 v16, 0x7f 431; GISEL-NEXT: v_mov_b32_e32 v17, 0 432; GISEL-NEXT: s_mov_b64 s[8:9], 0 433; GISEL-NEXT: v_xor_b32_e32 v0, v24, v0 434; GISEL-NEXT: v_xor_b32_e32 v1, v24, v1 435; GISEL-NEXT: v_xor_b32_e32 v2, v24, v2 436; GISEL-NEXT: v_xor_b32_e32 v3, v24, v3 437; GISEL-NEXT: v_xor_b32_e32 v8, v25, v8 438; GISEL-NEXT: v_xor_b32_e32 v9, v25, v9 439; GISEL-NEXT: v_xor_b32_e32 v10, v25, v10 440; GISEL-NEXT: v_xor_b32_e32 v11, v25, v11 441; GISEL-NEXT: v_sub_i32_e32 v18, vcc, v0, v24 442; GISEL-NEXT: v_subb_u32_e32 v19, vcc, v1, v24, vcc 443; GISEL-NEXT: v_sub_i32_e64 v26, s[4:5], v8, v25 444; GISEL-NEXT: v_subb_u32_e64 v27, s[4:5], v9, v25, s[4:5] 445; GISEL-NEXT: v_subb_u32_e32 v20, vcc, v2, v24, vcc 446; GISEL-NEXT: v_subb_u32_e32 v21, vcc, v3, v24, vcc 447; GISEL-NEXT: v_subb_u32_e64 v10, vcc, v10, v25, s[4:5] 448; GISEL-NEXT: v_subb_u32_e32 v11, vcc, v11, v25, vcc 449; GISEL-NEXT: v_ffbh_u32_e32 v8, v27 450; GISEL-NEXT: v_ffbh_u32_e32 v9, v26 451; GISEL-NEXT: v_ffbh_u32_e32 v22, v18 452; GISEL-NEXT: v_ffbh_u32_e32 v23, v19 453; GISEL-NEXT: v_or_b32_e32 v0, v26, v10 454; GISEL-NEXT: v_or_b32_e32 v1, v27, v11 455; GISEL-NEXT: v_or_b32_e32 v2, v18, v20 456; GISEL-NEXT: v_or_b32_e32 v3, v19, v21 457; GISEL-NEXT: v_add_i32_e32 v9, vcc, 32, v9 458; GISEL-NEXT: v_add_i32_e32 v22, vcc, 32, v22 459; GISEL-NEXT: v_ffbh_u32_e32 v28, v10 460; GISEL-NEXT: v_ffbh_u32_e32 v29, v11 461; GISEL-NEXT: v_ffbh_u32_e32 v30, v20 462; GISEL-NEXT: v_ffbh_u32_e32 v31, v21 463; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 464; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[2:3] 465; GISEL-NEXT: v_min_u32_e32 v0, v8, v9 466; GISEL-NEXT: v_min_u32_e32 v1, v23, v22 467; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], 32, v28 468; GISEL-NEXT: v_add_i32_e64 v3, s[6:7], 32, v30 469; GISEL-NEXT: v_min_u32_e32 v2, v29, v2 470; GISEL-NEXT: v_min_u32_e32 v3, v31, v3 471; GISEL-NEXT: v_add_i32_e64 v0, s[6:7], 64, v0 472; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], 64, v1 473; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 474; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[4:5] 475; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 476; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 477; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21] 478; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 479; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 480; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc 481; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5] 482; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5] 483; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v2 484; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[16:17] 485; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 486; GISEL-NEXT: v_or_b32_e32 v8, v8, v0 487; GISEL-NEXT: v_or_b32_e32 v9, v3, v1 488; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1] 489; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 490; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 491; GISEL-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc 492; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 493; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 494; GISEL-NEXT: v_or_b32_e32 v9, v22, v16 495; GISEL-NEXT: v_or_b32_e32 v8, v9, v8 496; GISEL-NEXT: v_and_b32_e32 v9, 1, v9 497; GISEL-NEXT: v_and_b32_e32 v8, 1, v8 498; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 499; GISEL-NEXT: v_cndmask_b32_e64 v22, v18, 0, vcc 500; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v8 501; GISEL-NEXT: v_cndmask_b32_e64 v8, v20, 0, vcc 502; GISEL-NEXT: v_cndmask_b32_e64 v9, v21, 0, vcc 503; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1 504; GISEL-NEXT: v_cndmask_b32_e64 v23, v19, 0, vcc 505; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 506; GISEL-NEXT: s_cbranch_execz .LBB0_6 507; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 508; GISEL-NEXT: v_add_i32_e32 v28, vcc, 1, v2 509; GISEL-NEXT: v_addc_u32_e64 v29, s[4:5], 0, v3, vcc 510; GISEL-NEXT: v_sub_i32_e32 v32, vcc, 0x7f, v2 511; GISEL-NEXT: v_not_b32_e32 v2, 63 512; GISEL-NEXT: v_addc_u32_e64 v30, vcc, 0, v0, s[4:5] 513; GISEL-NEXT: v_addc_u32_e32 v31, vcc, 0, v1, vcc 514; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v32, v2 515; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 64, v32 516; GISEL-NEXT: v_lshl_b64 v[0:1], v[18:19], v32 517; GISEL-NEXT: v_lshl_b64 v[2:3], v[20:21], v32 518; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 519; GISEL-NEXT: v_lshr_b64 v[8:9], v[18:19], v8 520; GISEL-NEXT: v_lshl_b64 v[22:23], v[18:19], v16 521; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v32 522; GISEL-NEXT: v_cndmask_b32_e32 v16, 0, v0, vcc 523; GISEL-NEXT: v_cndmask_b32_e32 v17, 0, v1, vcc 524; GISEL-NEXT: v_or_b32_e32 v0, v8, v2 525; GISEL-NEXT: v_or_b32_e32 v1, v9, v3 526; GISEL-NEXT: v_cndmask_b32_e32 v0, v22, v0, vcc 527; GISEL-NEXT: v_cndmask_b32_e32 v1, v23, v1, vcc 528; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v32 529; GISEL-NEXT: v_cndmask_b32_e32 v8, v0, v20, vcc 530; GISEL-NEXT: v_cndmask_b32_e32 v9, v1, v21, vcc 531; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] 532; GISEL-NEXT: v_mov_b32_e32 v0, s8 533; GISEL-NEXT: v_mov_b32_e32 v1, s9 534; GISEL-NEXT: v_mov_b32_e32 v2, s10 535; GISEL-NEXT: v_mov_b32_e32 v3, s11 536; GISEL-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] 537; GISEL-NEXT: s_xor_b64 s[12:13], exec, s[8:9] 538; GISEL-NEXT: s_cbranch_execz .LBB0_5 539; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4 540; GISEL-NEXT: v_add_i32_e32 v32, vcc, 0xffffffc0, v28 541; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v28 542; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v28 543; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v28 544; GISEL-NEXT: v_lshr_b64 v[0:1], v[20:21], v28 545; GISEL-NEXT: v_lshr_b64 v[2:3], v[18:19], v28 546; GISEL-NEXT: v_lshl_b64 v[22:23], v[20:21], v22 547; GISEL-NEXT: v_or_b32_e32 v22, v2, v22 548; GISEL-NEXT: v_or_b32_e32 v23, v3, v23 549; GISEL-NEXT: s_mov_b64 s[8:9], 0 550; GISEL-NEXT: v_lshr_b64 v[2:3], v[20:21], v32 551; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v22, vcc 552; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v23, vcc 553; GISEL-NEXT: v_cndmask_b32_e64 v18, v2, v18, s[4:5] 554; GISEL-NEXT: v_cndmask_b32_e64 v19, v3, v19, s[4:5] 555; GISEL-NEXT: v_cndmask_b32_e32 v20, 0, v0, vcc 556; GISEL-NEXT: v_cndmask_b32_e32 v21, 0, v1, vcc 557; GISEL-NEXT: v_add_i32_e32 v32, vcc, -1, v26 558; GISEL-NEXT: v_addc_u32_e32 v33, vcc, -1, v27, vcc 559; GISEL-NEXT: v_addc_u32_e32 v34, vcc, -1, v10, vcc 560; GISEL-NEXT: v_addc_u32_e32 v35, vcc, -1, v11, vcc 561; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] 562; GISEL-NEXT: v_mov_b32_e32 v23, 0 563; GISEL-NEXT: v_mov_b32_e32 v0, s8 564; GISEL-NEXT: v_mov_b32_e32 v1, s9 565; GISEL-NEXT: v_mov_b32_e32 v2, s10 566; GISEL-NEXT: v_mov_b32_e32 v3, s11 567; GISEL-NEXT: .LBB0_3: ; %udiv-do-while3 568; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 569; GISEL-NEXT: v_lshrrev_b32_e32 v36, 31, v17 570; GISEL-NEXT: v_lshl_b64 v[2:3], v[16:17], 1 571; GISEL-NEXT: v_or_b32_e32 v16, v0, v2 572; GISEL-NEXT: v_or_b32_e32 v17, v1, v3 573; GISEL-NEXT: v_lshrrev_b32_e32 v22, 31, v19 574; GISEL-NEXT: v_lshl_b64 v[0:1], v[18:19], 1 575; GISEL-NEXT: v_lshl_b64 v[2:3], v[20:21], 1 576; GISEL-NEXT: v_or_b32_e32 v2, v2, v22 577; GISEL-NEXT: v_lshrrev_b32_e32 v18, 31, v9 578; GISEL-NEXT: v_or_b32_e32 v0, v0, v18 579; GISEL-NEXT: v_sub_i32_e32 v18, vcc, v32, v0 580; GISEL-NEXT: v_subb_u32_e32 v18, vcc, v33, v1, vcc 581; GISEL-NEXT: v_subb_u32_e32 v18, vcc, v34, v2, vcc 582; GISEL-NEXT: v_subb_u32_e32 v18, vcc, v35, v3, vcc 583; GISEL-NEXT: v_ashrrev_i32_e32 v22, 31, v18 584; GISEL-NEXT: v_and_b32_e32 v18, v22, v26 585; GISEL-NEXT: v_sub_i32_e32 v18, vcc, v0, v18 586; GISEL-NEXT: v_and_b32_e32 v0, v22, v27 587; GISEL-NEXT: v_subb_u32_e32 v19, vcc, v1, v0, vcc 588; GISEL-NEXT: v_and_b32_e32 v0, v22, v10 589; GISEL-NEXT: v_subb_u32_e32 v20, vcc, v2, v0, vcc 590; GISEL-NEXT: v_and_b32_e32 v0, v22, v11 591; GISEL-NEXT: v_subb_u32_e32 v21, vcc, v3, v0, vcc 592; GISEL-NEXT: v_add_i32_e32 v28, vcc, -1, v28 593; GISEL-NEXT: v_addc_u32_e32 v29, vcc, -1, v29, vcc 594; GISEL-NEXT: v_addc_u32_e32 v30, vcc, -1, v30, vcc 595; GISEL-NEXT: v_addc_u32_e32 v31, vcc, -1, v31, vcc 596; GISEL-NEXT: v_or_b32_e32 v0, v28, v30 597; GISEL-NEXT: v_or_b32_e32 v1, v29, v31 598; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 599; GISEL-NEXT: v_and_b32_e32 v22, 1, v22 600; GISEL-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 601; GISEL-NEXT: s_or_b64 s[8:9], vcc, s[8:9] 602; GISEL-NEXT: v_or_b32_e32 v8, v8, v36 603; GISEL-NEXT: v_mov_b32_e32 v0, v22 604; GISEL-NEXT: v_mov_b32_e32 v1, v23 605; GISEL-NEXT: s_andn2_b64 exec, exec, s[8:9] 606; GISEL-NEXT: s_cbranch_execnz .LBB0_3 607; GISEL-NEXT: ; %bb.4: ; %Flow13 608; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] 609; GISEL-NEXT: .LBB0_5: ; %Flow14 610; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] 611; GISEL-NEXT: v_lshl_b64 v[2:3], v[16:17], 1 612; GISEL-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 613; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v17 614; GISEL-NEXT: v_or_b32_e32 v8, v8, v10 615; GISEL-NEXT: v_or_b32_e32 v22, v0, v2 616; GISEL-NEXT: v_or_b32_e32 v23, v1, v3 617; GISEL-NEXT: .LBB0_6: ; %Flow16 618; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] 619; GISEL-NEXT: s_mov_b64 s[8:9], 0 620; GISEL-NEXT: v_ashrrev_i32_e32 v18, 31, v7 621; GISEL-NEXT: v_ashrrev_i32_e32 v19, 31, v15 622; GISEL-NEXT: v_mov_b32_e32 v10, 0x7f 623; GISEL-NEXT: v_mov_b32_e32 v11, 0 624; GISEL-NEXT: v_xor_b32_e32 v0, v18, v4 625; GISEL-NEXT: v_xor_b32_e32 v1, v18, v5 626; GISEL-NEXT: v_xor_b32_e32 v2, v18, v6 627; GISEL-NEXT: v_xor_b32_e32 v3, v18, v7 628; GISEL-NEXT: v_xor_b32_e32 v4, v19, v12 629; GISEL-NEXT: v_xor_b32_e32 v5, v19, v13 630; GISEL-NEXT: v_xor_b32_e32 v14, v19, v14 631; GISEL-NEXT: v_xor_b32_e32 v15, v19, v15 632; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v18 633; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v1, v18, vcc 634; GISEL-NEXT: v_sub_i32_e64 v20, s[4:5], v4, v19 635; GISEL-NEXT: v_subb_u32_e64 v21, s[4:5], v5, v19, s[4:5] 636; GISEL-NEXT: v_subb_u32_e32 v12, vcc, v2, v18, vcc 637; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v3, v18, vcc 638; GISEL-NEXT: v_subb_u32_e64 v4, vcc, v14, v19, s[4:5] 639; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v15, v19, vcc 640; GISEL-NEXT: v_ffbh_u32_e32 v14, v21 641; GISEL-NEXT: v_ffbh_u32_e32 v15, v20 642; GISEL-NEXT: v_ffbh_u32_e32 v16, v7 643; GISEL-NEXT: v_ffbh_u32_e32 v17, v6 644; GISEL-NEXT: v_or_b32_e32 v0, v20, v4 645; GISEL-NEXT: v_or_b32_e32 v1, v21, v5 646; GISEL-NEXT: v_or_b32_e32 v2, v6, v12 647; GISEL-NEXT: v_or_b32_e32 v3, v7, v13 648; GISEL-NEXT: v_add_i32_e32 v15, vcc, 32, v15 649; GISEL-NEXT: v_ffbh_u32_e32 v26, v5 650; GISEL-NEXT: v_ffbh_u32_e32 v27, v4 651; GISEL-NEXT: v_add_i32_e32 v17, vcc, 32, v17 652; GISEL-NEXT: v_ffbh_u32_e32 v28, v13 653; GISEL-NEXT: v_ffbh_u32_e32 v29, v12 654; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 655; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[2:3] 656; GISEL-NEXT: v_min_u32_e32 v0, v14, v15 657; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], 32, v27 658; GISEL-NEXT: v_min_u32_e32 v2, v16, v17 659; GISEL-NEXT: v_add_i32_e64 v3, s[6:7], 32, v29 660; GISEL-NEXT: v_add_i32_e64 v0, s[6:7], 64, v0 661; GISEL-NEXT: v_min_u32_e32 v1, v26, v1 662; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], 64, v2 663; GISEL-NEXT: v_min_u32_e32 v3, v28, v3 664; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 665; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 666; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] 667; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 668; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13] 669; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc 670; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 671; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc 672; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5] 673; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5] 674; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[10:11] 675; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 676; GISEL-NEXT: v_xor_b32_e32 v10, 0x7f, v2 677; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1] 678; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 679; GISEL-NEXT: v_or_b32_e32 v10, v10, v0 680; GISEL-NEXT: v_or_b32_e32 v11, v3, v1 681; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 682; GISEL-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc 683; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 684; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 685; GISEL-NEXT: v_or_b32_e32 v11, v14, v15 686; GISEL-NEXT: v_and_b32_e32 v14, 1, v11 687; GISEL-NEXT: v_or_b32_e32 v10, v11, v10 688; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 689; GISEL-NEXT: v_cndmask_b32_e64 v14, v6, 0, vcc 690; GISEL-NEXT: v_and_b32_e32 v16, 1, v10 691; GISEL-NEXT: v_cndmask_b32_e64 v15, v7, 0, vcc 692; GISEL-NEXT: v_cndmask_b32_e64 v10, v12, 0, vcc 693; GISEL-NEXT: v_cndmask_b32_e64 v11, v13, 0, vcc 694; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 695; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 696; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] 697; GISEL-NEXT: s_cbranch_execz .LBB0_12 698; GISEL-NEXT: ; %bb.7: ; %udiv-bb1 699; GISEL-NEXT: v_add_i32_e32 v26, vcc, 1, v2 700; GISEL-NEXT: v_addc_u32_e64 v27, s[4:5], 0, v3, vcc 701; GISEL-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v2 702; GISEL-NEXT: v_not_b32_e32 v2, 63 703; GISEL-NEXT: v_addc_u32_e64 v28, vcc, 0, v0, s[4:5] 704; GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc 705; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v30, v2 706; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 64, v30 707; GISEL-NEXT: v_lshl_b64 v[0:1], v[6:7], v30 708; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], v30 709; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 710; GISEL-NEXT: v_lshr_b64 v[10:11], v[6:7], v10 711; GISEL-NEXT: v_lshl_b64 v[16:17], v[6:7], v14 712; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v30 713; GISEL-NEXT: v_cndmask_b32_e32 v14, 0, v0, vcc 714; GISEL-NEXT: v_cndmask_b32_e32 v15, 0, v1, vcc 715; GISEL-NEXT: v_or_b32_e32 v0, v10, v2 716; GISEL-NEXT: v_or_b32_e32 v1, v11, v3 717; GISEL-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc 718; GISEL-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc 719; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v30 720; GISEL-NEXT: v_cndmask_b32_e32 v10, v0, v12, vcc 721; GISEL-NEXT: v_cndmask_b32_e32 v11, v1, v13, vcc 722; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] 723; GISEL-NEXT: v_mov_b32_e32 v0, s8 724; GISEL-NEXT: v_mov_b32_e32 v1, s9 725; GISEL-NEXT: v_mov_b32_e32 v2, s10 726; GISEL-NEXT: v_mov_b32_e32 v3, s11 727; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 728; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7] 729; GISEL-NEXT: s_cbranch_execz .LBB0_11 730; GISEL-NEXT: ; %bb.8: ; %udiv-preheader 731; GISEL-NEXT: v_add_i32_e32 v32, vcc, 0xffffffc0, v26 732; GISEL-NEXT: v_sub_i32_e32 v16, vcc, 64, v26 733; GISEL-NEXT: v_lshr_b64 v[0:1], v[12:13], v26 734; GISEL-NEXT: v_lshr_b64 v[2:3], v[6:7], v26 735; GISEL-NEXT: s_mov_b64 s[4:5], 0 736; GISEL-NEXT: v_add_i32_e32 v30, vcc, -1, v20 737; GISEL-NEXT: v_addc_u32_e32 v31, vcc, -1, v21, vcc 738; GISEL-NEXT: v_lshl_b64 v[16:17], v[12:13], v16 739; GISEL-NEXT: v_lshr_b64 v[12:13], v[12:13], v32 740; GISEL-NEXT: v_addc_u32_e32 v32, vcc, -1, v4, vcc 741; GISEL-NEXT: v_addc_u32_e32 v33, vcc, -1, v5, vcc 742; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] 743; GISEL-NEXT: v_or_b32_e32 v2, v2, v16 744; GISEL-NEXT: v_or_b32_e32 v3, v3, v17 745; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v26 746; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc 747; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc 748; GISEL-NEXT: v_cndmask_b32_e32 v16, 0, v0, vcc 749; GISEL-NEXT: v_cndmask_b32_e32 v17, 0, v1, vcc 750; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v26 751; GISEL-NEXT: v_cndmask_b32_e32 v12, v2, v6, vcc 752; GISEL-NEXT: v_cndmask_b32_e32 v13, v3, v7, vcc 753; GISEL-NEXT: v_mov_b32_e32 v7, 0 754; GISEL-NEXT: v_mov_b32_e32 v0, s4 755; GISEL-NEXT: v_mov_b32_e32 v1, s5 756; GISEL-NEXT: v_mov_b32_e32 v2, s6 757; GISEL-NEXT: v_mov_b32_e32 v3, s7 758; GISEL-NEXT: .LBB0_9: ; %udiv-do-while 759; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 760; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], 1 761; GISEL-NEXT: v_lshl_b64 v[16:17], v[16:17], 1 762; GISEL-NEXT: v_lshrrev_b32_e32 v6, 31, v13 763; GISEL-NEXT: v_lshrrev_b32_e32 v34, 31, v11 764; GISEL-NEXT: v_lshl_b64 v[12:13], v[14:15], 1 765; GISEL-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 766; GISEL-NEXT: v_lshrrev_b32_e32 v14, 31, v15 767; GISEL-NEXT: v_add_i32_e32 v26, vcc, -1, v26 768; GISEL-NEXT: v_addc_u32_e32 v27, vcc, -1, v27, vcc 769; GISEL-NEXT: v_or_b32_e32 v16, v16, v6 770; GISEL-NEXT: v_or_b32_e32 v2, v2, v34 771; GISEL-NEXT: v_or_b32_e32 v10, v10, v14 772; GISEL-NEXT: v_or_b32_e32 v14, v0, v12 773; GISEL-NEXT: v_or_b32_e32 v15, v1, v13 774; GISEL-NEXT: v_addc_u32_e32 v28, vcc, -1, v28, vcc 775; GISEL-NEXT: v_addc_u32_e32 v29, vcc, -1, v29, vcc 776; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v30, v2 777; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v31, v3, vcc 778; GISEL-NEXT: v_or_b32_e32 v0, v26, v28 779; GISEL-NEXT: v_or_b32_e32 v1, v27, v29 780; GISEL-NEXT: v_subb_u32_e32 v6, vcc, v32, v16, vcc 781; GISEL-NEXT: v_subb_u32_e32 v6, vcc, v33, v17, vcc 782; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 783; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v6 784; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 785; GISEL-NEXT: v_and_b32_e32 v6, 1, v0 786; GISEL-NEXT: v_and_b32_e32 v12, v0, v20 787; GISEL-NEXT: v_and_b32_e32 v13, v0, v21 788; GISEL-NEXT: v_and_b32_e32 v34, v0, v4 789; GISEL-NEXT: v_and_b32_e32 v35, v0, v5 790; GISEL-NEXT: v_mov_b32_e32 v0, v6 791; GISEL-NEXT: v_mov_b32_e32 v1, v7 792; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v2, v12 793; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v3, v13, vcc 794; GISEL-NEXT: v_subb_u32_e32 v16, vcc, v16, v34, vcc 795; GISEL-NEXT: v_subb_u32_e32 v17, vcc, v17, v35, vcc 796; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5] 797; GISEL-NEXT: s_cbranch_execnz .LBB0_9 798; GISEL-NEXT: ; %bb.10: ; %Flow 799; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] 800; GISEL-NEXT: .LBB0_11: ; %Flow11 801; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] 802; GISEL-NEXT: v_lshl_b64 v[2:3], v[14:15], 1 803; GISEL-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 804; GISEL-NEXT: v_lshrrev_b32_e32 v4, 31, v15 805; GISEL-NEXT: v_or_b32_e32 v10, v10, v4 806; GISEL-NEXT: v_or_b32_e32 v14, v0, v2 807; GISEL-NEXT: v_or_b32_e32 v15, v1, v3 808; GISEL-NEXT: .LBB0_12: ; %Flow12 809; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] 810; GISEL-NEXT: v_xor_b32_e32 v3, v25, v24 811; GISEL-NEXT: v_xor_b32_e32 v7, v19, v18 812; GISEL-NEXT: v_xor_b32_e32 v0, v22, v3 813; GISEL-NEXT: v_xor_b32_e32 v1, v23, v3 814; GISEL-NEXT: v_xor_b32_e32 v2, v8, v3 815; GISEL-NEXT: v_xor_b32_e32 v6, v9, v3 816; GISEL-NEXT: v_xor_b32_e32 v4, v14, v7 817; GISEL-NEXT: v_xor_b32_e32 v5, v15, v7 818; GISEL-NEXT: v_xor_b32_e32 v8, v10, v7 819; GISEL-NEXT: v_xor_b32_e32 v9, v11, v7 820; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 821; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 822; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v7 823; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v5, v7, s[4:5] 824; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v3, vcc 825; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v6, v3, vcc 826; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v8, v7, s[4:5] 827; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v9, v7, vcc 828; GISEL-NEXT: s_setpc_b64 s[30:31] 829 %shl = sdiv <2 x i128> %lhs, %rhs 830 ret <2 x i128> %shl 831} 832 833define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { 834; SDAG-LABEL: v_udiv_v2i128_vv: 835; SDAG: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases 836; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 837; SDAG-NEXT: v_or_b32_e32 v17, v9, v11 838; SDAG-NEXT: v_or_b32_e32 v16, v8, v10 839; SDAG-NEXT: v_or_b32_e32 v19, v1, v3 840; SDAG-NEXT: v_or_b32_e32 v18, v0, v2 841; SDAG-NEXT: v_ffbh_u32_e32 v20, v10 842; SDAG-NEXT: v_ffbh_u32_e32 v21, v11 843; SDAG-NEXT: v_ffbh_u32_e32 v22, v8 844; SDAG-NEXT: v_ffbh_u32_e32 v23, v9 845; SDAG-NEXT: v_ffbh_u32_e32 v24, v2 846; SDAG-NEXT: v_ffbh_u32_e32 v25, v3 847; SDAG-NEXT: v_ffbh_u32_e32 v26, v0 848; SDAG-NEXT: v_ffbh_u32_e32 v27, v1 849; SDAG-NEXT: v_mov_b32_e32 v28, 0 850; SDAG-NEXT: s_mov_b64 s[8:9], 0x7f 851; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] 852; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19] 853; SDAG-NEXT: v_add_i32_e64 v16, s[6:7], 32, v20 854; SDAG-NEXT: v_add_i32_e64 v17, s[6:7], 32, v22 855; SDAG-NEXT: v_add_i32_e64 v18, s[6:7], 32, v24 856; SDAG-NEXT: v_add_i32_e64 v19, s[6:7], 32, v26 857; SDAG-NEXT: v_min_u32_e32 v16, v16, v21 858; SDAG-NEXT: v_min_u32_e32 v17, v17, v23 859; SDAG-NEXT: v_min_u32_e32 v18, v18, v25 860; SDAG-NEXT: v_min_u32_e32 v19, v19, v27 861; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5] 862; SDAG-NEXT: v_add_i32_e32 v17, vcc, 64, v17 863; SDAG-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, vcc 864; SDAG-NEXT: v_add_i32_e32 v19, vcc, 64, v19 865; SDAG-NEXT: v_addc_u32_e64 v21, s[4:5], 0, 0, vcc 866; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] 867; SDAG-NEXT: v_cndmask_b32_e64 v20, v20, 0, vcc 868; SDAG-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc 869; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 870; SDAG-NEXT: v_cndmask_b32_e64 v17, v21, 0, vcc 871; SDAG-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc 872; SDAG-NEXT: v_sub_i32_e32 v22, vcc, v16, v18 873; SDAG-NEXT: v_subb_u32_e32 v23, vcc, v20, v17, vcc 874; SDAG-NEXT: v_xor_b32_e32 v16, 0x7f, v22 875; SDAG-NEXT: v_subbrev_u32_e32 v24, vcc, 0, v28, vcc 876; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[22:23] 877; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] 878; SDAG-NEXT: v_subbrev_u32_e32 v25, vcc, 0, v28, vcc 879; SDAG-NEXT: v_or_b32_e32 v16, v16, v24 880; SDAG-NEXT: v_or_b32_e32 v17, v23, v25 881; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[24:25] 882; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 883; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17] 884; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[24:25] 885; SDAG-NEXT: v_cndmask_b32_e64 v16, v19, v18, s[4:5] 886; SDAG-NEXT: v_and_b32_e32 v16, 1, v16 887; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v16 888; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 889; SDAG-NEXT: v_cndmask_b32_e64 v16, v3, 0, s[4:5] 890; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1 891; SDAG-NEXT: v_cndmask_b32_e64 v17, v2, 0, s[4:5] 892; SDAG-NEXT: v_cndmask_b32_e64 v18, v1, 0, s[4:5] 893; SDAG-NEXT: s_and_b64 s[8:9], s[6:7], vcc 894; SDAG-NEXT: v_cndmask_b32_e64 v19, v0, 0, s[4:5] 895; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] 896; SDAG-NEXT: s_cbranch_execz .LBB1_6 897; SDAG-NEXT: ; %bb.1: ; %udiv-bb15 898; SDAG-NEXT: v_add_i32_e32 v26, vcc, 1, v22 899; SDAG-NEXT: v_sub_i32_e64 v16, s[4:5], 63, v22 900; SDAG-NEXT: v_mov_b32_e32 v20, 0 901; SDAG-NEXT: v_mov_b32_e32 v21, 0 902; SDAG-NEXT: v_addc_u32_e32 v27, vcc, 0, v23, vcc 903; SDAG-NEXT: v_lshl_b64 v[16:17], v[0:1], v16 904; SDAG-NEXT: v_addc_u32_e32 v28, vcc, 0, v24, vcc 905; SDAG-NEXT: v_addc_u32_e32 v29, vcc, 0, v25, vcc 906; SDAG-NEXT: v_or_b32_e32 v18, v26, v28 907; SDAG-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v22 908; SDAG-NEXT: v_or_b32_e32 v19, v27, v29 909; SDAG-NEXT: v_lshl_b64 v[22:23], v[2:3], v30 910; SDAG-NEXT: v_sub_i32_e32 v31, vcc, 64, v30 911; SDAG-NEXT: v_lshl_b64 v[24:25], v[0:1], v30 912; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19] 913; SDAG-NEXT: v_lshr_b64 v[18:19], v[0:1], v31 914; SDAG-NEXT: v_or_b32_e32 v19, v23, v19 915; SDAG-NEXT: v_or_b32_e32 v18, v22, v18 916; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v30 917; SDAG-NEXT: v_cndmask_b32_e64 v19, v17, v19, s[4:5] 918; SDAG-NEXT: v_cndmask_b32_e64 v18, v16, v18, s[4:5] 919; SDAG-NEXT: v_cndmask_b32_e64 v17, 0, v25, s[4:5] 920; SDAG-NEXT: v_cndmask_b32_e64 v16, 0, v24, s[4:5] 921; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v30 922; SDAG-NEXT: v_cndmask_b32_e64 v19, v19, v3, s[4:5] 923; SDAG-NEXT: v_cndmask_b32_e64 v18, v18, v2, s[4:5] 924; SDAG-NEXT: v_mov_b32_e32 v22, 0 925; SDAG-NEXT: v_mov_b32_e32 v23, 0 926; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc 927; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 928; SDAG-NEXT: s_cbranch_execz .LBB1_5 929; SDAG-NEXT: ; %bb.2: ; %udiv-preheader4 930; SDAG-NEXT: v_lshr_b64 v[20:21], v[0:1], v26 931; SDAG-NEXT: v_sub_i32_e32 v22, vcc, 64, v26 932; SDAG-NEXT: v_lshl_b64 v[22:23], v[2:3], v22 933; SDAG-NEXT: v_or_b32_e32 v23, v21, v23 934; SDAG-NEXT: v_or_b32_e32 v22, v20, v22 935; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v26 936; SDAG-NEXT: v_subrev_i32_e64 v20, s[4:5], 64, v26 937; SDAG-NEXT: v_lshr_b64 v[20:21], v[2:3], v20 938; SDAG-NEXT: v_cndmask_b32_e32 v21, v21, v23, vcc 939; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v26 940; SDAG-NEXT: v_cndmask_b32_e64 v1, v21, v1, s[4:5] 941; SDAG-NEXT: v_cndmask_b32_e32 v20, v20, v22, vcc 942; SDAG-NEXT: v_cndmask_b32_e64 v0, v20, v0, s[4:5] 943; SDAG-NEXT: v_lshr_b64 v[2:3], v[2:3], v26 944; SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc 945; SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 946; SDAG-NEXT: v_add_i32_e32 v30, vcc, -1, v8 947; SDAG-NEXT: v_addc_u32_e32 v31, vcc, -1, v9, vcc 948; SDAG-NEXT: v_addc_u32_e32 v32, vcc, -1, v10, vcc 949; SDAG-NEXT: v_addc_u32_e32 v33, vcc, -1, v11, vcc 950; SDAG-NEXT: s_mov_b64 s[4:5], 0 951; SDAG-NEXT: v_mov_b32_e32 v24, 0 952; SDAG-NEXT: v_mov_b32_e32 v25, 0 953; SDAG-NEXT: v_mov_b32_e32 v22, 0 954; SDAG-NEXT: v_mov_b32_e32 v23, 0 955; SDAG-NEXT: v_mov_b32_e32 v21, 0 956; SDAG-NEXT: .LBB1_3: ; %udiv-do-while3 957; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 958; SDAG-NEXT: v_lshrrev_b32_e32 v34, 31, v17 959; SDAG-NEXT: v_lshl_b64 v[16:17], v[16:17], 1 960; SDAG-NEXT: v_or_b32_e32 v17, v25, v17 961; SDAG-NEXT: v_or_b32_e32 v16, v24, v16 962; SDAG-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 963; SDAG-NEXT: v_lshrrev_b32_e32 v20, 31, v1 964; SDAG-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 965; SDAG-NEXT: v_or_b32_e32 v2, v2, v20 966; SDAG-NEXT: v_lshrrev_b32_e32 v20, 31, v19 967; SDAG-NEXT: v_or_b32_e32 v0, v0, v20 968; SDAG-NEXT: v_sub_i32_e32 v20, vcc, v30, v0 969; SDAG-NEXT: v_subb_u32_e32 v20, vcc, v31, v1, vcc 970; SDAG-NEXT: v_subb_u32_e32 v20, vcc, v32, v2, vcc 971; SDAG-NEXT: v_subb_u32_e32 v20, vcc, v33, v3, vcc 972; SDAG-NEXT: v_ashrrev_i32_e32 v20, 31, v20 973; SDAG-NEXT: v_and_b32_e32 v24, v20, v8 974; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v24 975; SDAG-NEXT: v_and_b32_e32 v24, v20, v9 976; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v1, v24, vcc 977; SDAG-NEXT: v_and_b32_e32 v24, v20, v10 978; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v2, v24, vcc 979; SDAG-NEXT: v_and_b32_e32 v24, v20, v11 980; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v3, v24, vcc 981; SDAG-NEXT: v_add_i32_e32 v26, vcc, -1, v26 982; SDAG-NEXT: v_addc_u32_e32 v27, vcc, -1, v27, vcc 983; SDAG-NEXT: v_addc_u32_e32 v28, vcc, -1, v28, vcc 984; SDAG-NEXT: v_addc_u32_e32 v29, vcc, -1, v29, vcc 985; SDAG-NEXT: v_or_b32_e32 v24, v26, v28 986; SDAG-NEXT: v_or_b32_e32 v25, v27, v29 987; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[24:25] 988; SDAG-NEXT: v_and_b32_e32 v20, 1, v20 989; SDAG-NEXT: v_lshl_b64 v[18:19], v[18:19], 1 990; SDAG-NEXT: v_or_b32_e32 v18, v18, v34 991; SDAG-NEXT: v_or_b32_e32 v19, v23, v19 992; SDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 993; SDAG-NEXT: v_or_b32_e32 v18, v22, v18 994; SDAG-NEXT: v_mov_b32_e32 v25, v21 995; SDAG-NEXT: v_mov_b32_e32 v24, v20 996; SDAG-NEXT: s_andn2_b64 exec, exec, s[4:5] 997; SDAG-NEXT: s_cbranch_execnz .LBB1_3 998; SDAG-NEXT: ; %bb.4: ; %Flow13 999; SDAG-NEXT: s_or_b64 exec, exec, s[4:5] 1000; SDAG-NEXT: .LBB1_5: ; %Flow14 1001; SDAG-NEXT: s_or_b64 exec, exec, s[8:9] 1002; SDAG-NEXT: v_lshl_b64 v[0:1], v[18:19], 1 1003; SDAG-NEXT: v_lshrrev_b32_e32 v8, 31, v17 1004; SDAG-NEXT: v_lshl_b64 v[2:3], v[16:17], 1 1005; SDAG-NEXT: v_or_b32_e32 v0, v0, v8 1006; SDAG-NEXT: v_or_b32_e32 v16, v23, v1 1007; SDAG-NEXT: v_or_b32_e32 v18, v21, v3 1008; SDAG-NEXT: v_or_b32_e32 v17, v22, v0 1009; SDAG-NEXT: v_or_b32_e32 v19, v20, v2 1010; SDAG-NEXT: .LBB1_6: ; %Flow16 1011; SDAG-NEXT: s_or_b64 exec, exec, s[6:7] 1012; SDAG-NEXT: v_or_b32_e32 v1, v13, v15 1013; SDAG-NEXT: v_or_b32_e32 v0, v12, v14 1014; SDAG-NEXT: v_or_b32_e32 v3, v5, v7 1015; SDAG-NEXT: v_or_b32_e32 v2, v4, v6 1016; SDAG-NEXT: v_ffbh_u32_e32 v8, v14 1017; SDAG-NEXT: v_ffbh_u32_e32 v9, v15 1018; SDAG-NEXT: v_ffbh_u32_e32 v10, v12 1019; SDAG-NEXT: v_ffbh_u32_e32 v11, v13 1020; SDAG-NEXT: v_ffbh_u32_e32 v20, v6 1021; SDAG-NEXT: v_ffbh_u32_e32 v21, v7 1022; SDAG-NEXT: v_ffbh_u32_e32 v22, v4 1023; SDAG-NEXT: v_ffbh_u32_e32 v23, v5 1024; SDAG-NEXT: v_mov_b32_e32 v24, 0 1025; SDAG-NEXT: s_mov_b64 s[8:9], 0x7f 1026; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 1027; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[2:3] 1028; SDAG-NEXT: v_add_i32_e64 v0, s[6:7], 32, v8 1029; SDAG-NEXT: v_add_i32_e64 v1, s[6:7], 32, v10 1030; SDAG-NEXT: v_add_i32_e64 v2, s[6:7], 32, v20 1031; SDAG-NEXT: v_add_i32_e64 v3, s[6:7], 32, v22 1032; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5] 1033; SDAG-NEXT: v_min_u32_e32 v0, v0, v9 1034; SDAG-NEXT: v_min_u32_e32 v1, v1, v11 1035; SDAG-NEXT: v_min_u32_e32 v2, v2, v21 1036; SDAG-NEXT: v_min_u32_e32 v3, v3, v23 1037; SDAG-NEXT: v_add_i32_e32 v1, vcc, 64, v1 1038; SDAG-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc 1039; SDAG-NEXT: v_add_i32_e32 v3, vcc, 64, v3 1040; SDAG-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc 1041; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] 1042; SDAG-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc 1043; SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1044; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] 1045; SDAG-NEXT: v_cndmask_b32_e64 v1, v9, 0, vcc 1046; SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 1047; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 1048; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v8, v1, vcc 1049; SDAG-NEXT: v_xor_b32_e32 v2, 0x7f, v0 1050; SDAG-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v24, vcc 1051; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[0:1] 1052; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 1053; SDAG-NEXT: v_subbrev_u32_e32 v21, vcc, 0, v24, vcc 1054; SDAG-NEXT: v_or_b32_e32 v2, v2, v20 1055; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21] 1056; SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1057; SDAG-NEXT: v_or_b32_e32 v3, v1, v21 1058; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21] 1059; SDAG-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc 1060; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 1061; SDAG-NEXT: v_and_b32_e32 v2, 1, v8 1062; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v2 1063; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 1064; SDAG-NEXT: v_cndmask_b32_e64 v8, v7, 0, s[4:5] 1065; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1 1066; SDAG-NEXT: v_cndmask_b32_e64 v9, v6, 0, s[4:5] 1067; SDAG-NEXT: v_cndmask_b32_e64 v10, v5, 0, s[4:5] 1068; SDAG-NEXT: v_cndmask_b32_e64 v11, v4, 0, s[4:5] 1069; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc 1070; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 1071; SDAG-NEXT: s_cbranch_execz .LBB1_12 1072; SDAG-NEXT: ; %bb.7: ; %udiv-bb1 1073; SDAG-NEXT: v_add_i32_e32 v22, vcc, 1, v0 1074; SDAG-NEXT: v_sub_i32_e64 v8, s[4:5], 63, v0 1075; SDAG-NEXT: v_mov_b32_e32 v2, 0 1076; SDAG-NEXT: v_mov_b32_e32 v3, 0 1077; SDAG-NEXT: v_addc_u32_e32 v23, vcc, 0, v1, vcc 1078; SDAG-NEXT: v_lshl_b64 v[8:9], v[4:5], v8 1079; SDAG-NEXT: v_addc_u32_e32 v24, vcc, 0, v20, vcc 1080; SDAG-NEXT: v_addc_u32_e32 v25, vcc, 0, v21, vcc 1081; SDAG-NEXT: v_or_b32_e32 v10, v22, v24 1082; SDAG-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v0 1083; SDAG-NEXT: v_or_b32_e32 v11, v23, v25 1084; SDAG-NEXT: v_lshl_b64 v[0:1], v[6:7], v26 1085; SDAG-NEXT: v_sub_i32_e32 v27, vcc, 64, v26 1086; SDAG-NEXT: v_lshl_b64 v[20:21], v[4:5], v26 1087; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] 1088; SDAG-NEXT: v_lshr_b64 v[10:11], v[4:5], v27 1089; SDAG-NEXT: v_or_b32_e32 v1, v1, v11 1090; SDAG-NEXT: v_or_b32_e32 v0, v0, v10 1091; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v26 1092; SDAG-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[4:5] 1093; SDAG-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5] 1094; SDAG-NEXT: v_cndmask_b32_e64 v9, 0, v21, s[4:5] 1095; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v20, s[4:5] 1096; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v26 1097; SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 1098; SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5] 1099; SDAG-NEXT: v_mov_b32_e32 v20, 0 1100; SDAG-NEXT: v_mov_b32_e32 v21, 0 1101; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc 1102; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 1103; SDAG-NEXT: s_cbranch_execz .LBB1_11 1104; SDAG-NEXT: ; %bb.8: ; %udiv-preheader 1105; SDAG-NEXT: v_lshr_b64 v[2:3], v[4:5], v22 1106; SDAG-NEXT: v_sub_i32_e32 v27, vcc, 64, v22 1107; SDAG-NEXT: v_subrev_i32_e32 v28, vcc, 64, v22 1108; SDAG-NEXT: v_lshr_b64 v[29:30], v[6:7], v22 1109; SDAG-NEXT: v_add_i32_e32 v26, vcc, -1, v12 1110; SDAG-NEXT: s_mov_b64 s[10:11], 0 1111; SDAG-NEXT: v_mov_b32_e32 v10, 0 1112; SDAG-NEXT: v_mov_b32_e32 v11, 0 1113; SDAG-NEXT: v_mov_b32_e32 v20, 0 1114; SDAG-NEXT: v_mov_b32_e32 v21, 0 1115; SDAG-NEXT: v_lshl_b64 v[31:32], v[6:7], v27 1116; SDAG-NEXT: v_lshr_b64 v[6:7], v[6:7], v28 1117; SDAG-NEXT: v_addc_u32_e32 v27, vcc, -1, v13, vcc 1118; SDAG-NEXT: v_or_b32_e32 v3, v3, v32 1119; SDAG-NEXT: v_or_b32_e32 v2, v2, v31 1120; SDAG-NEXT: v_addc_u32_e32 v28, vcc, -1, v14, vcc 1121; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v22 1122; SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5] 1123; SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5] 1124; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v30, s[4:5] 1125; SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v29, s[4:5] 1126; SDAG-NEXT: v_addc_u32_e32 v29, vcc, -1, v15, vcc 1127; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v22 1128; SDAG-NEXT: v_cndmask_b32_e32 v5, v3, v5, vcc 1129; SDAG-NEXT: v_cndmask_b32_e32 v4, v2, v4, vcc 1130; SDAG-NEXT: v_mov_b32_e32 v3, 0 1131; SDAG-NEXT: .LBB1_9: ; %udiv-do-while 1132; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 1133; SDAG-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 1134; SDAG-NEXT: v_lshrrev_b32_e32 v2, 31, v5 1135; SDAG-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 1136; SDAG-NEXT: v_lshrrev_b32_e32 v30, 31, v1 1137; SDAG-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 1138; SDAG-NEXT: v_lshrrev_b32_e32 v31, 31, v9 1139; SDAG-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 1140; SDAG-NEXT: v_or_b32_e32 v6, v6, v2 1141; SDAG-NEXT: v_or_b32_e32 v2, v4, v30 1142; SDAG-NEXT: v_or_b32_e32 v0, v0, v31 1143; SDAG-NEXT: v_or_b32_e32 v1, v21, v1 1144; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v26, v2 1145; SDAG-NEXT: v_subb_u32_e32 v4, vcc, v27, v5, vcc 1146; SDAG-NEXT: v_subb_u32_e32 v4, vcc, v28, v6, vcc 1147; SDAG-NEXT: v_subb_u32_e32 v4, vcc, v29, v7, vcc 1148; SDAG-NEXT: v_ashrrev_i32_e32 v30, 31, v4 1149; SDAG-NEXT: v_and_b32_e32 v31, v30, v13 1150; SDAG-NEXT: v_and_b32_e32 v4, v30, v12 1151; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v2, v4 1152; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v5, v31, vcc 1153; SDAG-NEXT: v_or_b32_e32 v9, v11, v9 1154; SDAG-NEXT: v_or_b32_e32 v0, v20, v0 1155; SDAG-NEXT: v_and_b32_e32 v2, 1, v30 1156; SDAG-NEXT: v_and_b32_e32 v11, v30, v15 1157; SDAG-NEXT: v_and_b32_e32 v30, v30, v14 1158; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v6, v30, vcc 1159; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v7, v11, vcc 1160; SDAG-NEXT: v_add_i32_e32 v22, vcc, -1, v22 1161; SDAG-NEXT: v_addc_u32_e32 v23, vcc, -1, v23, vcc 1162; SDAG-NEXT: v_addc_u32_e32 v24, vcc, -1, v24, vcc 1163; SDAG-NEXT: v_addc_u32_e32 v25, vcc, -1, v25, vcc 1164; SDAG-NEXT: v_or_b32_e32 v31, v23, v25 1165; SDAG-NEXT: v_or_b32_e32 v30, v22, v24 1166; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[30:31] 1167; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11] 1168; SDAG-NEXT: v_or_b32_e32 v8, v10, v8 1169; SDAG-NEXT: v_mov_b32_e32 v11, v3 1170; SDAG-NEXT: v_mov_b32_e32 v10, v2 1171; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11] 1172; SDAG-NEXT: s_cbranch_execnz .LBB1_9 1173; SDAG-NEXT: ; %bb.10: ; %Flow 1174; SDAG-NEXT: s_or_b64 exec, exec, s[10:11] 1175; SDAG-NEXT: .LBB1_11: ; %Flow11 1176; SDAG-NEXT: s_or_b64 exec, exec, s[8:9] 1177; SDAG-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 1178; SDAG-NEXT: v_lshrrev_b32_e32 v6, 31, v9 1179; SDAG-NEXT: v_lshl_b64 v[4:5], v[8:9], 1 1180; SDAG-NEXT: v_or_b32_e32 v0, v0, v6 1181; SDAG-NEXT: v_or_b32_e32 v8, v21, v1 1182; SDAG-NEXT: v_or_b32_e32 v10, v3, v5 1183; SDAG-NEXT: v_or_b32_e32 v9, v20, v0 1184; SDAG-NEXT: v_or_b32_e32 v11, v2, v4 1185; SDAG-NEXT: .LBB1_12: ; %Flow12 1186; SDAG-NEXT: s_or_b64 exec, exec, s[6:7] 1187; SDAG-NEXT: v_mov_b32_e32 v0, v19 1188; SDAG-NEXT: v_mov_b32_e32 v1, v18 1189; SDAG-NEXT: v_mov_b32_e32 v2, v17 1190; SDAG-NEXT: v_mov_b32_e32 v3, v16 1191; SDAG-NEXT: v_mov_b32_e32 v4, v11 1192; SDAG-NEXT: v_mov_b32_e32 v5, v10 1193; SDAG-NEXT: v_mov_b32_e32 v6, v9 1194; SDAG-NEXT: v_mov_b32_e32 v7, v8 1195; SDAG-NEXT: s_setpc_b64 s[30:31] 1196; 1197; GISEL-LABEL: v_udiv_v2i128_vv: 1198; GISEL: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases 1199; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1200; GISEL-NEXT: v_mov_b32_e32 v16, v2 1201; GISEL-NEXT: v_mov_b32_e32 v17, v3 1202; GISEL-NEXT: v_or_b32_e32 v2, v8, v10 1203; GISEL-NEXT: v_or_b32_e32 v3, v9, v11 1204; GISEL-NEXT: v_or_b32_e32 v18, v0, v16 1205; GISEL-NEXT: v_or_b32_e32 v19, v1, v17 1206; GISEL-NEXT: v_ffbh_u32_e32 v20, v9 1207; GISEL-NEXT: v_ffbh_u32_e32 v21, v8 1208; GISEL-NEXT: v_ffbh_u32_e32 v22, v11 1209; GISEL-NEXT: v_ffbh_u32_e32 v23, v10 1210; GISEL-NEXT: v_ffbh_u32_e32 v26, v1 1211; GISEL-NEXT: v_ffbh_u32_e32 v27, v0 1212; GISEL-NEXT: v_ffbh_u32_e32 v28, v16 1213; GISEL-NEXT: v_ffbh_u32_e32 v29, v17 1214; GISEL-NEXT: v_mov_b32_e32 v24, 0x7f 1215; GISEL-NEXT: v_mov_b32_e32 v25, 0 1216; GISEL-NEXT: s_mov_b64 s[8:9], 0 1217; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] 1218; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19] 1219; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], 32, v21 1220; GISEL-NEXT: v_add_i32_e64 v3, s[6:7], 32, v23 1221; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], 32, v27 1222; GISEL-NEXT: v_add_i32_e64 v19, s[6:7], 32, v28 1223; GISEL-NEXT: v_min_u32_e32 v2, v20, v2 1224; GISEL-NEXT: v_min_u32_e32 v3, v22, v3 1225; GISEL-NEXT: v_min_u32_e32 v18, v26, v18 1226; GISEL-NEXT: v_min_u32_e32 v19, v29, v19 1227; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1228; GISEL-NEXT: v_cndmask_b32_e64 v26, 0, 1, s[4:5] 1229; GISEL-NEXT: v_add_i32_e32 v2, vcc, 64, v2 1230; GISEL-NEXT: v_add_i32_e32 v18, vcc, 64, v18 1231; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 1232; GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 1233; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] 1234; GISEL-NEXT: v_cndmask_b32_e32 v3, v19, v18, vcc 1235; GISEL-NEXT: v_sub_i32_e32 v22, vcc, v2, v3 1236; GISEL-NEXT: v_subb_u32_e64 v23, s[4:5], 0, 0, vcc 1237; GISEL-NEXT: v_subb_u32_e64 v20, s[4:5], 0, 0, s[4:5] 1238; GISEL-NEXT: v_subb_u32_e64 v21, s[4:5], 0, 0, s[4:5] 1239; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v22 1240; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[22:23], v[24:25] 1241; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1242; GISEL-NEXT: v_or_b32_e32 v2, v2, v20 1243; GISEL-NEXT: v_or_b32_e32 v3, v23, v21 1244; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[20:21] 1245; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 1246; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21] 1247; GISEL-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc 1248; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] 1249; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1250; GISEL-NEXT: v_or_b32_e32 v3, v26, v18 1251; GISEL-NEXT: v_or_b32_e32 v2, v3, v2 1252; GISEL-NEXT: v_and_b32_e32 v3, 1, v3 1253; GISEL-NEXT: v_and_b32_e32 v2, 1, v2 1254; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 1255; GISEL-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc 1256; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v2 1257; GISEL-NEXT: v_cndmask_b32_e64 v2, v16, 0, vcc 1258; GISEL-NEXT: v_cndmask_b32_e64 v3, v17, 0, vcc 1259; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1 1260; GISEL-NEXT: v_cndmask_b32_e64 v19, v1, 0, vcc 1261; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] 1262; GISEL-NEXT: s_cbranch_execz .LBB1_6 1263; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 1264; GISEL-NEXT: v_add_i32_e32 v26, vcc, 1, v22 1265; GISEL-NEXT: v_addc_u32_e64 v27, s[4:5], 0, v23, vcc 1266; GISEL-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v22 1267; GISEL-NEXT: v_not_b32_e32 v2, 63 1268; GISEL-NEXT: v_addc_u32_e64 v28, vcc, 0, v20, s[4:5] 1269; GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v21, vcc 1270; GISEL-NEXT: v_add_i32_e64 v22, s[4:5], v30, v2 1271; GISEL-NEXT: v_sub_i32_e64 v20, s[4:5], 64, v30 1272; GISEL-NEXT: v_lshl_b64 v[2:3], v[0:1], v30 1273; GISEL-NEXT: v_lshl_b64 v[18:19], v[16:17], v30 1274; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 1275; GISEL-NEXT: v_lshr_b64 v[20:21], v[0:1], v20 1276; GISEL-NEXT: v_lshl_b64 v[24:25], v[0:1], v22 1277; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v30 1278; GISEL-NEXT: v_cndmask_b32_e32 v22, 0, v2, vcc 1279; GISEL-NEXT: v_cndmask_b32_e32 v23, 0, v3, vcc 1280; GISEL-NEXT: v_or_b32_e32 v2, v20, v18 1281; GISEL-NEXT: v_or_b32_e32 v3, v21, v19 1282; GISEL-NEXT: v_cndmask_b32_e32 v2, v24, v2, vcc 1283; GISEL-NEXT: v_cndmask_b32_e32 v3, v25, v3, vcc 1284; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v30 1285; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v16, vcc 1286; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc 1287; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] 1288; GISEL-NEXT: v_mov_b32_e32 v21, s11 1289; GISEL-NEXT: v_mov_b32_e32 v20, s10 1290; GISEL-NEXT: v_mov_b32_e32 v19, s9 1291; GISEL-NEXT: v_mov_b32_e32 v18, s8 1292; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 1293; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7] 1294; GISEL-NEXT: s_cbranch_execz .LBB1_5 1295; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4 1296; GISEL-NEXT: v_add_i32_e32 v32, vcc, 0xffffffc0, v26 1297; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 64, v26 1298; GISEL-NEXT: v_lshr_b64 v[18:19], v[16:17], v26 1299; GISEL-NEXT: v_lshr_b64 v[20:21], v[0:1], v26 1300; GISEL-NEXT: s_mov_b64 s[4:5], 0 1301; GISEL-NEXT: v_add_i32_e32 v30, vcc, -1, v8 1302; GISEL-NEXT: v_addc_u32_e32 v31, vcc, -1, v9, vcc 1303; GISEL-NEXT: v_lshl_b64 v[24:25], v[16:17], v24 1304; GISEL-NEXT: v_lshr_b64 v[16:17], v[16:17], v32 1305; GISEL-NEXT: v_addc_u32_e32 v32, vcc, -1, v10, vcc 1306; GISEL-NEXT: v_addc_u32_e32 v33, vcc, -1, v11, vcc 1307; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] 1308; GISEL-NEXT: v_or_b32_e32 v20, v20, v24 1309; GISEL-NEXT: v_or_b32_e32 v21, v21, v25 1310; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v26 1311; GISEL-NEXT: v_cndmask_b32_e32 v20, v16, v20, vcc 1312; GISEL-NEXT: v_cndmask_b32_e32 v21, v17, v21, vcc 1313; GISEL-NEXT: v_cndmask_b32_e32 v16, 0, v18, vcc 1314; GISEL-NEXT: v_cndmask_b32_e32 v17, 0, v19, vcc 1315; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v26 1316; GISEL-NEXT: v_cndmask_b32_e32 v24, v20, v0, vcc 1317; GISEL-NEXT: v_cndmask_b32_e32 v25, v21, v1, vcc 1318; GISEL-NEXT: v_mov_b32_e32 v1, 0 1319; GISEL-NEXT: v_mov_b32_e32 v21, s7 1320; GISEL-NEXT: v_mov_b32_e32 v20, s6 1321; GISEL-NEXT: v_mov_b32_e32 v19, s5 1322; GISEL-NEXT: v_mov_b32_e32 v18, s4 1323; GISEL-NEXT: .LBB1_3: ; %udiv-do-while3 1324; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 1325; GISEL-NEXT: v_lshrrev_b32_e32 v34, 31, v23 1326; GISEL-NEXT: v_lshl_b64 v[20:21], v[22:23], 1 1327; GISEL-NEXT: v_lshrrev_b32_e32 v0, 31, v25 1328; GISEL-NEXT: v_lshl_b64 v[24:25], v[24:25], 1 1329; GISEL-NEXT: v_lshl_b64 v[16:17], v[16:17], 1 1330; GISEL-NEXT: v_lshrrev_b32_e32 v35, 31, v3 1331; GISEL-NEXT: v_add_i32_e32 v26, vcc, -1, v26 1332; GISEL-NEXT: v_addc_u32_e32 v27, vcc, -1, v27, vcc 1333; GISEL-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 1334; GISEL-NEXT: v_or_b32_e32 v22, v18, v20 1335; GISEL-NEXT: v_or_b32_e32 v23, v19, v21 1336; GISEL-NEXT: v_or_b32_e32 v16, v16, v0 1337; GISEL-NEXT: v_or_b32_e32 v20, v24, v35 1338; GISEL-NEXT: v_addc_u32_e32 v28, vcc, -1, v28, vcc 1339; GISEL-NEXT: v_addc_u32_e32 v29, vcc, -1, v29, vcc 1340; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v30, v20 1341; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v31, v25, vcc 1342; GISEL-NEXT: v_or_b32_e32 v18, v26, v28 1343; GISEL-NEXT: v_or_b32_e32 v19, v27, v29 1344; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v32, v16, vcc 1345; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v33, v17, vcc 1346; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19] 1347; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v0 1348; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1349; GISEL-NEXT: v_and_b32_e32 v18, v0, v8 1350; GISEL-NEXT: v_and_b32_e32 v19, v0, v9 1351; GISEL-NEXT: v_and_b32_e32 v21, v0, v10 1352; GISEL-NEXT: v_and_b32_e32 v35, v0, v11 1353; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 1354; GISEL-NEXT: v_sub_i32_e32 v24, vcc, v20, v18 1355; GISEL-NEXT: v_subb_u32_e32 v25, vcc, v25, v19, vcc 1356; GISEL-NEXT: v_subb_u32_e32 v16, vcc, v16, v21, vcc 1357; GISEL-NEXT: v_subb_u32_e32 v17, vcc, v17, v35, vcc 1358; GISEL-NEXT: v_or_b32_e32 v2, v2, v34 1359; GISEL-NEXT: v_mov_b32_e32 v19, v1 1360; GISEL-NEXT: v_mov_b32_e32 v18, v0 1361; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5] 1362; GISEL-NEXT: s_cbranch_execnz .LBB1_3 1363; GISEL-NEXT: ; %bb.4: ; %Flow13 1364; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] 1365; GISEL-NEXT: .LBB1_5: ; %Flow14 1366; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] 1367; GISEL-NEXT: v_lshl_b64 v[0:1], v[22:23], 1 1368; GISEL-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 1369; GISEL-NEXT: v_lshrrev_b32_e32 v8, 31, v23 1370; GISEL-NEXT: v_or_b32_e32 v2, v2, v8 1371; GISEL-NEXT: v_or_b32_e32 v18, v18, v0 1372; GISEL-NEXT: v_or_b32_e32 v19, v19, v1 1373; GISEL-NEXT: .LBB1_6: ; %Flow16 1374; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] 1375; GISEL-NEXT: s_mov_b64 s[8:9], 0 1376; GISEL-NEXT: v_or_b32_e32 v0, v12, v14 1377; GISEL-NEXT: v_or_b32_e32 v1, v13, v15 1378; GISEL-NEXT: v_or_b32_e32 v8, v4, v6 1379; GISEL-NEXT: v_or_b32_e32 v9, v5, v7 1380; GISEL-NEXT: v_ffbh_u32_e32 v16, v13 1381; GISEL-NEXT: v_ffbh_u32_e32 v17, v12 1382; GISEL-NEXT: v_ffbh_u32_e32 v20, v15 1383; GISEL-NEXT: v_ffbh_u32_e32 v21, v14 1384; GISEL-NEXT: v_ffbh_u32_e32 v22, v5 1385; GISEL-NEXT: v_ffbh_u32_e32 v23, v4 1386; GISEL-NEXT: v_ffbh_u32_e32 v24, v7 1387; GISEL-NEXT: v_ffbh_u32_e32 v25, v6 1388; GISEL-NEXT: v_mov_b32_e32 v10, 0x7f 1389; GISEL-NEXT: v_mov_b32_e32 v11, 0 1390; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 1391; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[8:9] 1392; GISEL-NEXT: v_add_i32_e64 v0, s[6:7], 32, v17 1393; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], 32, v21 1394; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], 32, v23 1395; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], 32, v25 1396; GISEL-NEXT: v_min_u32_e32 v0, v16, v0 1397; GISEL-NEXT: v_min_u32_e32 v1, v20, v1 1398; GISEL-NEXT: v_min_u32_e32 v8, v22, v8 1399; GISEL-NEXT: v_min_u32_e32 v9, v24, v9 1400; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1401; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5] 1402; GISEL-NEXT: v_add_i32_e32 v0, vcc, 64, v0 1403; GISEL-NEXT: v_add_i32_e32 v8, vcc, 64, v8 1404; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] 1405; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1406; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] 1407; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc 1408; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v0, v1 1409; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, vcc 1410; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5] 1411; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5] 1412; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[16:17], v[10:11] 1413; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1414; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v16 1415; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1] 1416; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1417; GISEL-NEXT: v_or_b32_e32 v8, v8, v0 1418; GISEL-NEXT: v_or_b32_e32 v9, v17, v1 1419; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 1420; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc 1421; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1422; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1423; GISEL-NEXT: v_or_b32_e32 v9, v20, v10 1424; GISEL-NEXT: v_and_b32_e32 v10, 1, v9 1425; GISEL-NEXT: v_or_b32_e32 v8, v9, v8 1426; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 1427; GISEL-NEXT: v_cndmask_b32_e64 v10, v4, 0, vcc 1428; GISEL-NEXT: v_and_b32_e32 v20, 1, v8 1429; GISEL-NEXT: v_cndmask_b32_e64 v11, v5, 0, vcc 1430; GISEL-NEXT: v_cndmask_b32_e64 v8, v6, 0, vcc 1431; GISEL-NEXT: v_cndmask_b32_e64 v9, v7, 0, vcc 1432; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v20 1433; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 1434; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] 1435; GISEL-NEXT: s_cbranch_execz .LBB1_12 1436; GISEL-NEXT: ; %bb.7: ; %udiv-bb1 1437; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v16 1438; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, v17, vcc 1439; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v16 1440; GISEL-NEXT: v_not_b32_e32 v9, 63 1441; GISEL-NEXT: v_addc_u32_e64 v24, vcc, 0, v0, s[4:5] 1442; GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc 1443; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v26, v9 1444; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 64, v26 1445; GISEL-NEXT: v_lshl_b64 v[0:1], v[4:5], v26 1446; GISEL-NEXT: v_lshl_b64 v[16:17], v[6:7], v26 1447; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 1448; GISEL-NEXT: v_lshr_b64 v[20:21], v[4:5], v10 1449; GISEL-NEXT: v_lshl_b64 v[22:23], v[4:5], v9 1450; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v26 1451; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v0, vcc 1452; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v1, vcc 1453; GISEL-NEXT: v_or_b32_e32 v0, v20, v16 1454; GISEL-NEXT: v_or_b32_e32 v1, v21, v17 1455; GISEL-NEXT: v_cndmask_b32_e32 v0, v22, v0, vcc 1456; GISEL-NEXT: v_cndmask_b32_e32 v1, v23, v1, vcc 1457; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v26 1458; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1459; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1460; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] 1461; GISEL-NEXT: v_mov_b32_e32 v23, s11 1462; GISEL-NEXT: v_mov_b32_e32 v22, s10 1463; GISEL-NEXT: v_mov_b32_e32 v21, s9 1464; GISEL-NEXT: v_mov_b32_e32 v20, s8 1465; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 1466; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7] 1467; GISEL-NEXT: s_cbranch_execz .LBB1_11 1468; GISEL-NEXT: ; %bb.8: ; %udiv-preheader 1469; GISEL-NEXT: v_add_i32_e32 v28, vcc, 0xffffffc0, v8 1470; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v8 1471; GISEL-NEXT: v_lshr_b64 v[16:17], v[6:7], v8 1472; GISEL-NEXT: v_lshr_b64 v[20:21], v[4:5], v8 1473; GISEL-NEXT: s_mov_b64 s[4:5], 0 1474; GISEL-NEXT: v_add_i32_e32 v26, vcc, -1, v12 1475; GISEL-NEXT: v_addc_u32_e32 v27, vcc, -1, v13, vcc 1476; GISEL-NEXT: v_lshl_b64 v[22:23], v[6:7], v22 1477; GISEL-NEXT: v_lshr_b64 v[6:7], v[6:7], v28 1478; GISEL-NEXT: v_addc_u32_e32 v28, vcc, -1, v14, vcc 1479; GISEL-NEXT: v_addc_u32_e32 v29, vcc, -1, v15, vcc 1480; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] 1481; GISEL-NEXT: v_or_b32_e32 v20, v20, v22 1482; GISEL-NEXT: v_or_b32_e32 v21, v21, v23 1483; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v8 1484; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v20, vcc 1485; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v21, vcc 1486; GISEL-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc 1487; GISEL-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc 1488; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 1489; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v4, vcc 1490; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v5, vcc 1491; GISEL-NEXT: v_mov_b32_e32 v5, 0 1492; GISEL-NEXT: v_mov_b32_e32 v23, s7 1493; GISEL-NEXT: v_mov_b32_e32 v22, s6 1494; GISEL-NEXT: v_mov_b32_e32 v21, s5 1495; GISEL-NEXT: v_mov_b32_e32 v20, s4 1496; GISEL-NEXT: .LBB1_9: ; %udiv-do-while 1497; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 1498; GISEL-NEXT: v_lshl_b64 v[22:23], v[6:7], 1 1499; GISEL-NEXT: v_lshl_b64 v[16:17], v[16:17], 1 1500; GISEL-NEXT: v_lshrrev_b32_e32 v4, 31, v7 1501; GISEL-NEXT: v_lshrrev_b32_e32 v30, 31, v1 1502; GISEL-NEXT: v_lshl_b64 v[6:7], v[9:10], 1 1503; GISEL-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 1504; GISEL-NEXT: v_lshrrev_b32_e32 v9, 31, v10 1505; GISEL-NEXT: v_add_i32_e32 v8, vcc, -1, v8 1506; GISEL-NEXT: v_addc_u32_e32 v11, vcc, -1, v11, vcc 1507; GISEL-NEXT: v_or_b32_e32 v16, v16, v4 1508; GISEL-NEXT: v_or_b32_e32 v22, v22, v30 1509; GISEL-NEXT: v_or_b32_e32 v0, v0, v9 1510; GISEL-NEXT: v_or_b32_e32 v9, v20, v6 1511; GISEL-NEXT: v_or_b32_e32 v10, v21, v7 1512; GISEL-NEXT: v_addc_u32_e32 v24, vcc, -1, v24, vcc 1513; GISEL-NEXT: v_addc_u32_e32 v25, vcc, -1, v25, vcc 1514; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v26, v22 1515; GISEL-NEXT: v_subb_u32_e32 v4, vcc, v27, v23, vcc 1516; GISEL-NEXT: v_or_b32_e32 v6, v8, v24 1517; GISEL-NEXT: v_or_b32_e32 v7, v11, v25 1518; GISEL-NEXT: v_subb_u32_e32 v4, vcc, v28, v16, vcc 1519; GISEL-NEXT: v_subb_u32_e32 v4, vcc, v29, v17, vcc 1520; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] 1521; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v4 1522; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1523; GISEL-NEXT: v_and_b32_e32 v4, 1, v6 1524; GISEL-NEXT: v_and_b32_e32 v7, v6, v12 1525; GISEL-NEXT: v_and_b32_e32 v30, v6, v13 1526; GISEL-NEXT: v_and_b32_e32 v31, v6, v14 1527; GISEL-NEXT: v_and_b32_e32 v32, v6, v15 1528; GISEL-NEXT: v_mov_b32_e32 v21, v5 1529; GISEL-NEXT: v_mov_b32_e32 v20, v4 1530; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v22, v7 1531; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v23, v30, vcc 1532; GISEL-NEXT: v_subb_u32_e32 v16, vcc, v16, v31, vcc 1533; GISEL-NEXT: v_subb_u32_e32 v17, vcc, v17, v32, vcc 1534; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5] 1535; GISEL-NEXT: s_cbranch_execnz .LBB1_9 1536; GISEL-NEXT: ; %bb.10: ; %Flow 1537; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] 1538; GISEL-NEXT: .LBB1_11: ; %Flow11 1539; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] 1540; GISEL-NEXT: v_lshl_b64 v[4:5], v[9:10], 1 1541; GISEL-NEXT: v_lshl_b64 v[8:9], v[0:1], 1 1542; GISEL-NEXT: v_lshrrev_b32_e32 v0, 31, v10 1543; GISEL-NEXT: v_or_b32_e32 v8, v8, v0 1544; GISEL-NEXT: v_or_b32_e32 v10, v20, v4 1545; GISEL-NEXT: v_or_b32_e32 v11, v21, v5 1546; GISEL-NEXT: .LBB1_12: ; %Flow12 1547; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] 1548; GISEL-NEXT: v_mov_b32_e32 v0, v18 1549; GISEL-NEXT: v_mov_b32_e32 v1, v19 1550; GISEL-NEXT: v_mov_b32_e32 v4, v10 1551; GISEL-NEXT: v_mov_b32_e32 v5, v11 1552; GISEL-NEXT: v_mov_b32_e32 v6, v8 1553; GISEL-NEXT: v_mov_b32_e32 v7, v9 1554; GISEL-NEXT: s_setpc_b64 s[30:31] 1555 %shl = udiv <2 x i128> %lhs, %rhs 1556 ret <2 x i128> %shl 1557} 1558 1559define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { 1560; SDAG-LABEL: v_srem_v2i128_vv: 1561; SDAG: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases 1562; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1563; SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 1564; SDAG-NEXT: v_sub_i32_e32 v16, vcc, 0, v0 1565; SDAG-NEXT: v_mov_b32_e32 v19, 0 1566; SDAG-NEXT: v_ashrrev_i32_e32 v28, 31, v3 1567; SDAG-NEXT: s_mov_b64 s[10:11], 0x7f 1568; SDAG-NEXT: v_subb_u32_e32 v17, vcc, 0, v1, vcc 1569; SDAG-NEXT: v_mov_b32_e32 v29, v28 1570; SDAG-NEXT: v_subb_u32_e32 v18, vcc, 0, v2, vcc 1571; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3] 1572; SDAG-NEXT: v_cndmask_b32_e64 v17, v1, v17, s[4:5] 1573; SDAG-NEXT: v_cndmask_b32_e64 v16, v0, v16, s[4:5] 1574; SDAG-NEXT: v_subb_u32_e32 v1, vcc, 0, v3, vcc 1575; SDAG-NEXT: v_cndmask_b32_e64 v0, v2, v18, s[4:5] 1576; SDAG-NEXT: v_ffbh_u32_e32 v18, v16 1577; SDAG-NEXT: v_ffbh_u32_e32 v20, v17 1578; SDAG-NEXT: v_sub_i32_e32 v21, vcc, 0, v8 1579; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5] 1580; SDAG-NEXT: v_or_b32_e32 v2, v16, v0 1581; SDAG-NEXT: v_add_i32_e64 v18, s[4:5], 32, v18 1582; SDAG-NEXT: v_ffbh_u32_e32 v22, v0 1583; SDAG-NEXT: v_subb_u32_e32 v23, vcc, 0, v9, vcc 1584; SDAG-NEXT: v_or_b32_e32 v3, v17, v1 1585; SDAG-NEXT: v_min_u32_e32 v18, v18, v20 1586; SDAG-NEXT: v_add_i32_e64 v20, s[4:5], 32, v22 1587; SDAG-NEXT: v_ffbh_u32_e32 v22, v1 1588; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[10:11] 1589; SDAG-NEXT: v_cndmask_b32_e64 v30, v9, v23, s[4:5] 1590; SDAG-NEXT: v_subb_u32_e32 v9, vcc, 0, v10, vcc 1591; SDAG-NEXT: v_cndmask_b32_e64 v31, v8, v21, s[4:5] 1592; SDAG-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[2:3] 1593; SDAG-NEXT: v_min_u32_e32 v3, v20, v22 1594; SDAG-NEXT: v_add_i32_e64 v8, s[8:9], 64, v18 1595; SDAG-NEXT: v_addc_u32_e64 v18, s[8:9], 0, 0, s[8:9] 1596; SDAG-NEXT: v_subb_u32_e32 v20, vcc, 0, v11, vcc 1597; SDAG-NEXT: v_cndmask_b32_e64 v2, v10, v9, s[4:5] 1598; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 1599; SDAG-NEXT: v_cndmask_b32_e64 v18, v18, 0, vcc 1600; SDAG-NEXT: v_cndmask_b32_e32 v10, v8, v3, vcc 1601; SDAG-NEXT: v_ffbh_u32_e32 v9, v31 1602; SDAG-NEXT: v_ffbh_u32_e32 v21, v30 1603; SDAG-NEXT: v_cndmask_b32_e64 v3, v11, v20, s[4:5] 1604; SDAG-NEXT: v_or_b32_e32 v8, v31, v2 1605; SDAG-NEXT: v_add_i32_e32 v11, vcc, 32, v9 1606; SDAG-NEXT: v_ffbh_u32_e32 v20, v2 1607; SDAG-NEXT: v_or_b32_e32 v9, v30, v3 1608; SDAG-NEXT: v_min_u32_e32 v11, v11, v21 1609; SDAG-NEXT: v_add_i32_e32 v20, vcc, 32, v20 1610; SDAG-NEXT: v_ffbh_u32_e32 v21, v3 1611; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1612; SDAG-NEXT: v_min_u32_e32 v8, v20, v21 1613; SDAG-NEXT: v_add_i32_e64 v9, s[4:5], 64, v11 1614; SDAG-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] 1615; SDAG-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[2:3] 1616; SDAG-NEXT: v_cndmask_b32_e64 v11, v11, 0, s[4:5] 1617; SDAG-NEXT: v_cndmask_b32_e64 v8, v9, v8, s[4:5] 1618; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[6:7] 1619; SDAG-NEXT: v_sub_i32_e32 v10, vcc, v8, v10 1620; SDAG-NEXT: v_subb_u32_e32 v11, vcc, v11, v18, vcc 1621; SDAG-NEXT: v_xor_b32_e32 v8, 0x7f, v10 1622; SDAG-NEXT: v_subbrev_u32_e32 v18, vcc, 0, v19, vcc 1623; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[10:11] 1624; SDAG-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5] 1625; SDAG-NEXT: v_subbrev_u32_e32 v19, vcc, 0, v19, vcc 1626; SDAG-NEXT: v_or_b32_e32 v8, v8, v18 1627; SDAG-NEXT: v_or_b32_e32 v9, v11, v19 1628; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19] 1629; SDAG-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc 1630; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] 1631; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19] 1632; SDAG-NEXT: v_cndmask_b32_e64 v8, v21, v20, s[4:5] 1633; SDAG-NEXT: v_and_b32_e32 v8, 1, v8 1634; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v8 1635; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 1636; SDAG-NEXT: v_cndmask_b32_e64 v35, v1, 0, s[4:5] 1637; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1 1638; SDAG-NEXT: v_cndmask_b32_e64 v32, v0, 0, s[4:5] 1639; SDAG-NEXT: v_cndmask_b32_e64 v27, v17, 0, s[4:5] 1640; SDAG-NEXT: s_and_b64 s[8:9], s[6:7], vcc 1641; SDAG-NEXT: v_cndmask_b32_e64 v33, v16, 0, s[4:5] 1642; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] 1643; SDAG-NEXT: s_cbranch_execz .LBB2_6 1644; SDAG-NEXT: ; %bb.1: ; %udiv-bb15 1645; SDAG-NEXT: v_add_i32_e32 v32, vcc, 1, v10 1646; SDAG-NEXT: v_sub_i32_e64 v20, s[4:5], 63, v10 1647; SDAG-NEXT: v_mov_b32_e32 v8, 0 1648; SDAG-NEXT: v_mov_b32_e32 v9, 0 1649; SDAG-NEXT: v_addc_u32_e32 v33, vcc, 0, v11, vcc 1650; SDAG-NEXT: v_lshl_b64 v[20:21], v[16:17], v20 1651; SDAG-NEXT: v_addc_u32_e32 v34, vcc, 0, v18, vcc 1652; SDAG-NEXT: v_addc_u32_e32 v35, vcc, 0, v19, vcc 1653; SDAG-NEXT: v_or_b32_e32 v18, v32, v34 1654; SDAG-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v10 1655; SDAG-NEXT: v_or_b32_e32 v19, v33, v35 1656; SDAG-NEXT: v_lshl_b64 v[10:11], v[0:1], v24 1657; SDAG-NEXT: v_sub_i32_e32 v25, vcc, 64, v24 1658; SDAG-NEXT: v_lshl_b64 v[22:23], v[16:17], v24 1659; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19] 1660; SDAG-NEXT: v_lshr_b64 v[18:19], v[16:17], v25 1661; SDAG-NEXT: v_or_b32_e32 v11, v11, v19 1662; SDAG-NEXT: v_or_b32_e32 v10, v10, v18 1663; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24 1664; SDAG-NEXT: v_cndmask_b32_e64 v11, v21, v11, s[4:5] 1665; SDAG-NEXT: v_cndmask_b32_e64 v10, v20, v10, s[4:5] 1666; SDAG-NEXT: v_cndmask_b32_e64 v21, 0, v23, s[4:5] 1667; SDAG-NEXT: v_cndmask_b32_e64 v20, 0, v22, s[4:5] 1668; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v24 1669; SDAG-NEXT: v_cndmask_b32_e64 v11, v11, v1, s[4:5] 1670; SDAG-NEXT: v_cndmask_b32_e64 v10, v10, v0, s[4:5] 1671; SDAG-NEXT: v_mov_b32_e32 v18, 0 1672; SDAG-NEXT: v_mov_b32_e32 v19, 0 1673; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc 1674; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 1675; SDAG-NEXT: s_cbranch_execz .LBB2_5 1676; SDAG-NEXT: ; %bb.2: ; %udiv-preheader4 1677; SDAG-NEXT: v_lshr_b64 v[8:9], v[16:17], v32 1678; SDAG-NEXT: v_sub_i32_e32 v26, vcc, 64, v32 1679; SDAG-NEXT: v_subrev_i32_e32 v37, vcc, 64, v32 1680; SDAG-NEXT: v_lshr_b64 v[24:25], v[0:1], v32 1681; SDAG-NEXT: v_add_i32_e32 v36, vcc, -1, v31 1682; SDAG-NEXT: s_mov_b64 s[10:11], 0 1683; SDAG-NEXT: v_mov_b32_e32 v22, 0 1684; SDAG-NEXT: v_mov_b32_e32 v23, 0 1685; SDAG-NEXT: v_mov_b32_e32 v18, 0 1686; SDAG-NEXT: v_mov_b32_e32 v19, 0 1687; SDAG-NEXT: v_lshl_b64 v[26:27], v[0:1], v26 1688; SDAG-NEXT: v_lshr_b64 v[48:49], v[0:1], v37 1689; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v30, vcc 1690; SDAG-NEXT: v_or_b32_e32 v9, v9, v27 1691; SDAG-NEXT: v_or_b32_e32 v8, v8, v26 1692; SDAG-NEXT: v_addc_u32_e32 v38, vcc, -1, v2, vcc 1693; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v32 1694; SDAG-NEXT: v_cndmask_b32_e64 v9, v49, v9, s[4:5] 1695; SDAG-NEXT: v_cndmask_b32_e64 v8, v48, v8, s[4:5] 1696; SDAG-NEXT: v_cndmask_b32_e64 v27, 0, v25, s[4:5] 1697; SDAG-NEXT: v_cndmask_b32_e64 v26, 0, v24, s[4:5] 1698; SDAG-NEXT: v_addc_u32_e32 v39, vcc, -1, v3, vcc 1699; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v32 1700; SDAG-NEXT: v_cndmask_b32_e32 v25, v9, v17, vcc 1701; SDAG-NEXT: v_cndmask_b32_e32 v24, v8, v16, vcc 1702; SDAG-NEXT: v_mov_b32_e32 v9, 0 1703; SDAG-NEXT: .LBB2_3: ; %udiv-do-while3 1704; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 1705; SDAG-NEXT: v_lshrrev_b32_e32 v8, 31, v21 1706; SDAG-NEXT: v_lshl_b64 v[20:21], v[20:21], 1 1707; SDAG-NEXT: v_lshl_b64 v[26:27], v[26:27], 1 1708; SDAG-NEXT: v_lshrrev_b32_e32 v48, 31, v25 1709; SDAG-NEXT: v_lshl_b64 v[24:25], v[24:25], 1 1710; SDAG-NEXT: v_lshrrev_b32_e32 v49, 31, v11 1711; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 1712; SDAG-NEXT: v_or_b32_e32 v21, v23, v21 1713; SDAG-NEXT: v_or_b32_e32 v20, v22, v20 1714; SDAG-NEXT: v_or_b32_e32 v22, v26, v48 1715; SDAG-NEXT: v_or_b32_e32 v23, v24, v49 1716; SDAG-NEXT: v_or_b32_e32 v10, v10, v8 1717; SDAG-NEXT: v_sub_i32_e32 v8, vcc, v36, v23 1718; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v37, v25, vcc 1719; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v38, v22, vcc 1720; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v39, v27, vcc 1721; SDAG-NEXT: v_ashrrev_i32_e32 v8, 31, v8 1722; SDAG-NEXT: v_and_b32_e32 v24, v8, v31 1723; SDAG-NEXT: v_and_b32_e32 v26, v8, v30 1724; SDAG-NEXT: v_and_b32_e32 v48, v8, v2 1725; SDAG-NEXT: v_and_b32_e32 v49, v8, v3 1726; SDAG-NEXT: v_and_b32_e32 v8, 1, v8 1727; SDAG-NEXT: v_sub_i32_e32 v24, vcc, v23, v24 1728; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v25, v26, vcc 1729; SDAG-NEXT: v_subb_u32_e32 v26, vcc, v22, v48, vcc 1730; SDAG-NEXT: v_subb_u32_e32 v27, vcc, v27, v49, vcc 1731; SDAG-NEXT: v_add_i32_e32 v32, vcc, -1, v32 1732; SDAG-NEXT: v_addc_u32_e32 v33, vcc, -1, v33, vcc 1733; SDAG-NEXT: v_addc_u32_e32 v34, vcc, -1, v34, vcc 1734; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v35, vcc 1735; SDAG-NEXT: v_or_b32_e32 v22, v32, v34 1736; SDAG-NEXT: v_or_b32_e32 v23, v33, v35 1737; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[22:23] 1738; SDAG-NEXT: v_or_b32_e32 v11, v19, v11 1739; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11] 1740; SDAG-NEXT: v_or_b32_e32 v10, v18, v10 1741; SDAG-NEXT: v_mov_b32_e32 v23, v9 1742; SDAG-NEXT: v_mov_b32_e32 v22, v8 1743; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11] 1744; SDAG-NEXT: s_cbranch_execnz .LBB2_3 1745; SDAG-NEXT: ; %bb.4: ; %Flow13 1746; SDAG-NEXT: s_or_b64 exec, exec, s[10:11] 1747; SDAG-NEXT: .LBB2_5: ; %Flow14 1748; SDAG-NEXT: s_or_b64 exec, exec, s[8:9] 1749; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 1750; SDAG-NEXT: v_lshrrev_b32_e32 v22, 31, v21 1751; SDAG-NEXT: v_or_b32_e32 v10, v10, v22 1752; SDAG-NEXT: v_lshl_b64 v[20:21], v[20:21], 1 1753; SDAG-NEXT: v_or_b32_e32 v35, v19, v11 1754; SDAG-NEXT: v_or_b32_e32 v32, v18, v10 1755; SDAG-NEXT: v_or_b32_e32 v27, v9, v21 1756; SDAG-NEXT: v_or_b32_e32 v33, v8, v20 1757; SDAG-NEXT: .LBB2_6: ; %Flow16 1758; SDAG-NEXT: s_or_b64 exec, exec, s[6:7] 1759; SDAG-NEXT: v_ashrrev_i32_e32 v26, 31, v7 1760; SDAG-NEXT: v_sub_i32_e32 v8, vcc, 0, v4 1761; SDAG-NEXT: v_mov_b32_e32 v18, 0 1762; SDAG-NEXT: s_mov_b64 s[10:11], 0x7f 1763; SDAG-NEXT: v_mov_b32_e32 v34, v26 1764; SDAG-NEXT: v_subb_u32_e32 v9, vcc, 0, v5, vcc 1765; SDAG-NEXT: v_subb_u32_e32 v10, vcc, 0, v6, vcc 1766; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7] 1767; SDAG-NEXT: v_cndmask_b32_e64 v9, v5, v9, s[4:5] 1768; SDAG-NEXT: v_cndmask_b32_e64 v8, v4, v8, s[4:5] 1769; SDAG-NEXT: v_subb_u32_e32 v5, vcc, 0, v7, vcc 1770; SDAG-NEXT: v_cndmask_b32_e64 v4, v6, v10, s[4:5] 1771; SDAG-NEXT: v_ffbh_u32_e32 v10, v8 1772; SDAG-NEXT: v_ffbh_u32_e32 v11, v9 1773; SDAG-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5] 1774; SDAG-NEXT: v_sub_i32_e32 v19, vcc, 0, v12 1775; SDAG-NEXT: v_or_b32_e32 v6, v8, v4 1776; SDAG-NEXT: v_ffbh_u32_e32 v20, v4 1777; SDAG-NEXT: v_add_i32_e64 v10, s[4:5], 32, v10 1778; SDAG-NEXT: v_subb_u32_e32 v21, vcc, 0, v13, vcc 1779; SDAG-NEXT: v_or_b32_e32 v7, v9, v5 1780; SDAG-NEXT: v_add_i32_e64 v20, s[4:5], 32, v20 1781; SDAG-NEXT: v_ffbh_u32_e32 v22, v5 1782; SDAG-NEXT: v_min_u32_e32 v10, v10, v11 1783; SDAG-NEXT: v_subb_u32_e32 v11, vcc, 0, v14, vcc 1784; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[14:15] 1785; SDAG-NEXT: v_cndmask_b32_e64 v36, v13, v21, s[4:5] 1786; SDAG-NEXT: v_cndmask_b32_e64 v37, v12, v19, s[4:5] 1787; SDAG-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[6:7] 1788; SDAG-NEXT: v_min_u32_e32 v7, v20, v22 1789; SDAG-NEXT: v_add_i32_e64 v10, s[8:9], 64, v10 1790; SDAG-NEXT: v_addc_u32_e64 v12, s[8:9], 0, 0, s[8:9] 1791; SDAG-NEXT: v_subb_u32_e32 v13, vcc, 0, v15, vcc 1792; SDAG-NEXT: v_cndmask_b32_e64 v6, v14, v11, s[4:5] 1793; SDAG-NEXT: v_ffbh_u32_e32 v11, v37 1794; SDAG-NEXT: v_ffbh_u32_e32 v14, v36 1795; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 1796; SDAG-NEXT: v_cndmask_b32_e64 v12, v12, 0, vcc 1797; SDAG-NEXT: v_cndmask_b32_e32 v19, v10, v7, vcc 1798; SDAG-NEXT: v_cndmask_b32_e64 v7, v15, v13, s[4:5] 1799; SDAG-NEXT: v_or_b32_e32 v10, v37, v6 1800; SDAG-NEXT: v_ffbh_u32_e32 v13, v6 1801; SDAG-NEXT: v_add_i32_e32 v15, vcc, 32, v11 1802; SDAG-NEXT: v_or_b32_e32 v11, v36, v7 1803; SDAG-NEXT: v_add_i32_e32 v13, vcc, 32, v13 1804; SDAG-NEXT: v_ffbh_u32_e32 v20, v7 1805; SDAG-NEXT: v_min_u32_e32 v14, v15, v14 1806; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 1807; SDAG-NEXT: v_min_u32_e32 v10, v13, v20 1808; SDAG-NEXT: v_add_i32_e64 v11, s[4:5], 64, v14 1809; SDAG-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5] 1810; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[6:7] 1811; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] 1812; SDAG-NEXT: v_cndmask_b32_e64 v13, v13, 0, vcc 1813; SDAG-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc 1814; SDAG-NEXT: v_sub_i32_e32 v10, vcc, v10, v19 1815; SDAG-NEXT: v_subb_u32_e32 v11, vcc, v13, v12, vcc 1816; SDAG-NEXT: v_xor_b32_e32 v14, 0x7f, v10 1817; SDAG-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v18, vcc 1818; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[10:11] 1819; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] 1820; SDAG-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v18, vcc 1821; SDAG-NEXT: v_or_b32_e32 v14, v14, v12 1822; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] 1823; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1824; SDAG-NEXT: v_or_b32_e32 v15, v11, v13 1825; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13] 1826; SDAG-NEXT: v_cndmask_b32_e32 v18, v18, v19, vcc 1827; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] 1828; SDAG-NEXT: v_and_b32_e32 v14, 1, v18 1829; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v14 1830; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 1831; SDAG-NEXT: v_cndmask_b32_e64 v19, v5, 0, s[4:5] 1832; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1 1833; SDAG-NEXT: v_cndmask_b32_e64 v18, v4, 0, s[4:5] 1834; SDAG-NEXT: v_cndmask_b32_e64 v15, v9, 0, s[4:5] 1835; SDAG-NEXT: v_cndmask_b32_e64 v14, v8, 0, s[4:5] 1836; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc 1837; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 1838; SDAG-NEXT: s_cbranch_execz .LBB2_12 1839; SDAG-NEXT: ; %bb.7: ; %udiv-bb1 1840; SDAG-NEXT: v_add_i32_e32 v38, vcc, 1, v10 1841; SDAG-NEXT: v_sub_i32_e64 v18, s[4:5], 63, v10 1842; SDAG-NEXT: v_mov_b32_e32 v14, 0 1843; SDAG-NEXT: v_mov_b32_e32 v15, 0 1844; SDAG-NEXT: v_addc_u32_e32 v39, vcc, 0, v11, vcc 1845; SDAG-NEXT: v_lshl_b64 v[18:19], v[8:9], v18 1846; SDAG-NEXT: v_addc_u32_e32 v48, vcc, 0, v12, vcc 1847; SDAG-NEXT: v_addc_u32_e32 v49, vcc, 0, v13, vcc 1848; SDAG-NEXT: v_or_b32_e32 v11, v38, v48 1849; SDAG-NEXT: v_sub_i32_e32 v13, vcc, 0x7f, v10 1850; SDAG-NEXT: v_or_b32_e32 v12, v39, v49 1851; SDAG-NEXT: v_lshl_b64 v[20:21], v[4:5], v13 1852; SDAG-NEXT: v_sub_i32_e32 v10, vcc, 64, v13 1853; SDAG-NEXT: v_lshl_b64 v[22:23], v[8:9], v13 1854; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[11:12] 1855; SDAG-NEXT: v_lshr_b64 v[10:11], v[8:9], v10 1856; SDAG-NEXT: v_or_b32_e32 v11, v21, v11 1857; SDAG-NEXT: v_or_b32_e32 v10, v20, v10 1858; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v13 1859; SDAG-NEXT: v_cndmask_b32_e64 v12, v19, v11, s[4:5] 1860; SDAG-NEXT: v_cndmask_b32_e64 v18, v18, v10, s[4:5] 1861; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v23, s[4:5] 1862; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v22, s[4:5] 1863; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13 1864; SDAG-NEXT: v_cndmask_b32_e64 v13, v12, v5, s[4:5] 1865; SDAG-NEXT: v_cndmask_b32_e64 v12, v18, v4, s[4:5] 1866; SDAG-NEXT: v_mov_b32_e32 v18, 0 1867; SDAG-NEXT: v_mov_b32_e32 v19, 0 1868; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc 1869; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 1870; SDAG-NEXT: s_cbranch_execz .LBB2_11 1871; SDAG-NEXT: ; %bb.8: ; %udiv-preheader 1872; SDAG-NEXT: v_lshr_b64 v[14:15], v[8:9], v38 1873; SDAG-NEXT: v_sub_i32_e32 v24, vcc, 64, v38 1874; SDAG-NEXT: v_subrev_i32_e32 v51, vcc, 64, v38 1875; SDAG-NEXT: v_lshr_b64 v[22:23], v[4:5], v38 1876; SDAG-NEXT: v_add_i32_e32 v50, vcc, -1, v37 1877; SDAG-NEXT: s_mov_b64 s[10:11], 0 1878; SDAG-NEXT: v_mov_b32_e32 v20, 0 1879; SDAG-NEXT: v_mov_b32_e32 v21, 0 1880; SDAG-NEXT: v_mov_b32_e32 v18, 0 1881; SDAG-NEXT: v_mov_b32_e32 v19, 0 1882; SDAG-NEXT: v_lshl_b64 v[24:25], v[4:5], v24 1883; SDAG-NEXT: v_lshr_b64 v[53:54], v[4:5], v51 1884; SDAG-NEXT: v_addc_u32_e32 v51, vcc, -1, v36, vcc 1885; SDAG-NEXT: v_or_b32_e32 v15, v15, v25 1886; SDAG-NEXT: v_or_b32_e32 v14, v14, v24 1887; SDAG-NEXT: v_addc_u32_e32 v52, vcc, -1, v6, vcc 1888; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v38 1889; SDAG-NEXT: v_cndmask_b32_e64 v15, v54, v15, s[4:5] 1890; SDAG-NEXT: v_cndmask_b32_e64 v14, v53, v14, s[4:5] 1891; SDAG-NEXT: v_cndmask_b32_e64 v25, 0, v23, s[4:5] 1892; SDAG-NEXT: v_cndmask_b32_e64 v24, 0, v22, s[4:5] 1893; SDAG-NEXT: v_addc_u32_e32 v53, vcc, -1, v7, vcc 1894; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v38 1895; SDAG-NEXT: v_cndmask_b32_e32 v23, v15, v9, vcc 1896; SDAG-NEXT: v_cndmask_b32_e32 v22, v14, v8, vcc 1897; SDAG-NEXT: v_mov_b32_e32 v15, 0 1898; SDAG-NEXT: .LBB2_9: ; %udiv-do-while 1899; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 1900; SDAG-NEXT: v_lshl_b64 v[24:25], v[24:25], 1 1901; SDAG-NEXT: v_lshrrev_b32_e32 v14, 31, v23 1902; SDAG-NEXT: v_lshl_b64 v[22:23], v[22:23], 1 1903; SDAG-NEXT: v_lshrrev_b32_e32 v54, 31, v13 1904; SDAG-NEXT: v_lshl_b64 v[12:13], v[12:13], 1 1905; SDAG-NEXT: v_lshrrev_b32_e32 v55, 31, v11 1906; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 1907; SDAG-NEXT: v_or_b32_e32 v24, v24, v14 1908; SDAG-NEXT: v_or_b32_e32 v22, v22, v54 1909; SDAG-NEXT: v_or_b32_e32 v12, v12, v55 1910; SDAG-NEXT: v_or_b32_e32 v13, v19, v13 1911; SDAG-NEXT: v_or_b32_e32 v11, v21, v11 1912; SDAG-NEXT: v_or_b32_e32 v12, v18, v12 1913; SDAG-NEXT: v_sub_i32_e32 v14, vcc, v50, v22 1914; SDAG-NEXT: v_subb_u32_e32 v14, vcc, v51, v23, vcc 1915; SDAG-NEXT: v_subb_u32_e32 v14, vcc, v52, v24, vcc 1916; SDAG-NEXT: v_subb_u32_e32 v14, vcc, v53, v25, vcc 1917; SDAG-NEXT: v_ashrrev_i32_e32 v21, 31, v14 1918; SDAG-NEXT: v_and_b32_e32 v14, 1, v21 1919; SDAG-NEXT: v_and_b32_e32 v54, v21, v7 1920; SDAG-NEXT: v_and_b32_e32 v55, v21, v6 1921; SDAG-NEXT: v_and_b32_e32 v40, v21, v36 1922; SDAG-NEXT: v_and_b32_e32 v21, v21, v37 1923; SDAG-NEXT: v_sub_i32_e32 v22, vcc, v22, v21 1924; SDAG-NEXT: v_subb_u32_e32 v23, vcc, v23, v40, vcc 1925; SDAG-NEXT: v_subb_u32_e32 v24, vcc, v24, v55, vcc 1926; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v25, v54, vcc 1927; SDAG-NEXT: v_add_i32_e32 v38, vcc, -1, v38 1928; SDAG-NEXT: v_addc_u32_e32 v39, vcc, -1, v39, vcc 1929; SDAG-NEXT: v_addc_u32_e32 v48, vcc, -1, v48, vcc 1930; SDAG-NEXT: v_addc_u32_e32 v49, vcc, -1, v49, vcc 1931; SDAG-NEXT: v_or_b32_e32 v55, v39, v49 1932; SDAG-NEXT: v_or_b32_e32 v54, v38, v48 1933; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[54:55] 1934; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11] 1935; SDAG-NEXT: v_or_b32_e32 v10, v20, v10 1936; SDAG-NEXT: v_mov_b32_e32 v21, v15 1937; SDAG-NEXT: v_mov_b32_e32 v20, v14 1938; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11] 1939; SDAG-NEXT: s_cbranch_execnz .LBB2_9 1940; SDAG-NEXT: ; %bb.10: ; %Flow 1941; SDAG-NEXT: s_or_b64 exec, exec, s[10:11] 1942; SDAG-NEXT: .LBB2_11: ; %Flow11 1943; SDAG-NEXT: s_or_b64 exec, exec, s[8:9] 1944; SDAG-NEXT: v_lshl_b64 v[12:13], v[12:13], 1 1945; SDAG-NEXT: v_lshrrev_b32_e32 v20, 31, v11 1946; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 1947; SDAG-NEXT: v_or_b32_e32 v12, v12, v20 1948; SDAG-NEXT: v_or_b32_e32 v19, v19, v13 1949; SDAG-NEXT: v_or_b32_e32 v15, v15, v11 1950; SDAG-NEXT: v_or_b32_e32 v18, v18, v12 1951; SDAG-NEXT: v_or_b32_e32 v14, v14, v10 1952; SDAG-NEXT: .LBB2_12: ; %Flow12 1953; SDAG-NEXT: s_or_b64 exec, exec, s[6:7] 1954; SDAG-NEXT: v_mul_lo_u32 v12, v33, v3 1955; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v33, v2, 0 1956; SDAG-NEXT: v_mul_lo_u32 v24, v27, v2 1957; SDAG-NEXT: v_mul_lo_u32 v35, v35, v31 1958; SDAG-NEXT: v_mul_lo_u32 v38, v32, v30 1959; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v31, v33, 0 1960; SDAG-NEXT: v_mov_b32_e32 v13, 0 1961; SDAG-NEXT: v_mul_lo_u32 v25, v14, v7 1962; SDAG-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v14, v6, 0 1963; SDAG-NEXT: v_mul_lo_u32 v39, v15, v6 1964; SDAG-NEXT: v_mul_lo_u32 v19, v19, v37 1965; SDAG-NEXT: v_mul_lo_u32 v48, v18, v36 1966; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v37, v14, 0 1967; SDAG-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1968; SDAG-NEXT: v_mov_b32_e32 v12, v3 1969; SDAG-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v30, v33, v[12:13] 1970; SDAG-NEXT: v_sub_i32_e32 v12, vcc, v16, v2 1971; SDAG-NEXT: v_add_i32_e64 v16, s[4:5], v21, v25 1972; SDAG-NEXT: v_add_i32_e64 v11, s[4:5], v11, v24 1973; SDAG-NEXT: v_mov_b32_e32 v24, v23 1974; SDAG-NEXT: v_mov_b32_e32 v23, v13 1975; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v31, v27, v[22:23] 1976; SDAG-NEXT: v_xor_b32_e32 v33, v12, v28 1977; SDAG-NEXT: v_add_i32_e64 v21, s[4:5], v16, v39 1978; SDAG-NEXT: v_mov_b32_e32 v12, v7 1979; SDAG-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v36, v14, v[12:13] 1980; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v32, v31, v[10:11] 1981; SDAG-NEXT: v_add_i32_e64 v24, s[4:5], v24, v3 1982; SDAG-NEXT: v_addc_u32_e64 v25, s[4:5], 0, 0, s[4:5] 1983; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v17, v2, vcc 1984; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v18, v37, v[20:21] 1985; SDAG-NEXT: v_mov_b32_e32 v14, v23 1986; SDAG-NEXT: v_mov_b32_e32 v23, v13 1987; SDAG-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v37, v15, v[22:23] 1988; SDAG-NEXT: v_add_i32_e64 v11, s[4:5], v35, v11 1989; SDAG-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v30, v27, v[24:25] 1990; SDAG-NEXT: v_xor_b32_e32 v7, v7, v29 1991; SDAG-NEXT: v_add_i32_e64 v3, s[4:5], v19, v3 1992; SDAG-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 1993; SDAG-NEXT: v_addc_u32_e64 v14, s[4:5], 0, 0, s[4:5] 1994; SDAG-NEXT: v_mov_b32_e32 v18, v12 1995; SDAG-NEXT: v_add_i32_e64 v19, s[4:5], v38, v11 1996; SDAG-NEXT: v_add_i32_e64 v3, s[4:5], v48, v3 1997; SDAG-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v36, v15, v[13:14] 1998; SDAG-NEXT: v_add_i32_e64 v10, s[4:5], v16, v10 1999; SDAG-NEXT: v_addc_u32_e64 v13, s[4:5], v17, v19, s[4:5] 2000; SDAG-NEXT: v_subb_u32_e32 v0, vcc, v0, v10, vcc 2001; SDAG-NEXT: v_add_i32_e64 v10, s[4:5], v11, v2 2002; SDAG-NEXT: v_addc_u32_e64 v11, s[4:5], v12, v3, s[4:5] 2003; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v1, v13, vcc 2004; SDAG-NEXT: v_xor_b32_e32 v2, v0, v28 2005; SDAG-NEXT: v_xor_b32_e32 v3, v1, v29 2006; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v33, v28 2007; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v7, v29, vcc 2008; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v2, v28, vcc 2009; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v3, v29, vcc 2010; SDAG-NEXT: v_sub_i32_e32 v6, vcc, v8, v6 2011; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v9, v18, vcc 2012; SDAG-NEXT: v_xor_b32_e32 v6, v6, v26 2013; SDAG-NEXT: v_subb_u32_e32 v4, vcc, v4, v10, vcc 2014; SDAG-NEXT: v_xor_b32_e32 v7, v7, v34 2015; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v5, v11, vcc 2016; SDAG-NEXT: v_xor_b32_e32 v8, v4, v26 2017; SDAG-NEXT: v_xor_b32_e32 v9, v5, v34 2018; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v6, v26 2019; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v7, v34, vcc 2020; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v8, v26, vcc 2021; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v9, v34, vcc 2022; SDAG-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 2023; SDAG-NEXT: s_waitcnt vmcnt(0) 2024; SDAG-NEXT: s_setpc_b64 s[30:31] 2025; 2026; GISEL-LABEL: v_srem_v2i128_vv: 2027; GISEL: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases 2028; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2029; GISEL-NEXT: v_ashrrev_i32_e32 v28, 31, v3 2030; GISEL-NEXT: v_ashrrev_i32_e32 v18, 31, v11 2031; GISEL-NEXT: v_mov_b32_e32 v19, 0x7f 2032; GISEL-NEXT: v_mov_b32_e32 v20, 0 2033; GISEL-NEXT: s_mov_b64 s[8:9], 0 2034; GISEL-NEXT: v_xor_b32_e32 v0, v0, v28 2035; GISEL-NEXT: v_xor_b32_e32 v1, v1, v28 2036; GISEL-NEXT: v_xor_b32_e32 v2, v2, v28 2037; GISEL-NEXT: v_xor_b32_e32 v3, v3, v28 2038; GISEL-NEXT: v_xor_b32_e32 v8, v8, v18 2039; GISEL-NEXT: v_xor_b32_e32 v9, v9, v18 2040; GISEL-NEXT: v_xor_b32_e32 v10, v10, v18 2041; GISEL-NEXT: v_xor_b32_e32 v11, v11, v18 2042; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v0, v28 2043; GISEL-NEXT: v_subb_u32_e32 v17, vcc, v1, v28, vcc 2044; GISEL-NEXT: v_sub_i32_e64 v30, s[4:5], v8, v18 2045; GISEL-NEXT: v_subb_u32_e64 v29, s[4:5], v9, v18, s[4:5] 2046; GISEL-NEXT: v_subb_u32_e32 v8, vcc, v2, v28, vcc 2047; GISEL-NEXT: v_subb_u32_e32 v9, vcc, v3, v28, vcc 2048; GISEL-NEXT: v_subb_u32_e64 v10, vcc, v10, v18, s[4:5] 2049; GISEL-NEXT: v_subb_u32_e32 v11, vcc, v11, v18, vcc 2050; GISEL-NEXT: v_ffbh_u32_e32 v18, v29 2051; GISEL-NEXT: v_ffbh_u32_e32 v21, v30 2052; GISEL-NEXT: v_ffbh_u32_e32 v22, v17 2053; GISEL-NEXT: v_ffbh_u32_e32 v23, v16 2054; GISEL-NEXT: v_or_b32_e32 v0, v30, v10 2055; GISEL-NEXT: v_or_b32_e32 v1, v29, v11 2056; GISEL-NEXT: v_or_b32_e32 v2, v16, v8 2057; GISEL-NEXT: v_or_b32_e32 v3, v17, v9 2058; GISEL-NEXT: v_add_i32_e32 v21, vcc, 32, v21 2059; GISEL-NEXT: v_add_i32_e32 v23, vcc, 32, v23 2060; GISEL-NEXT: v_ffbh_u32_e32 v24, v10 2061; GISEL-NEXT: v_ffbh_u32_e32 v25, v11 2062; GISEL-NEXT: v_ffbh_u32_e32 v26, v8 2063; GISEL-NEXT: v_ffbh_u32_e32 v27, v9 2064; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 2065; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[2:3] 2066; GISEL-NEXT: v_min_u32_e32 v0, v18, v21 2067; GISEL-NEXT: v_min_u32_e32 v1, v22, v23 2068; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], 32, v24 2069; GISEL-NEXT: v_add_i32_e64 v3, s[6:7], 32, v26 2070; GISEL-NEXT: v_min_u32_e32 v2, v25, v2 2071; GISEL-NEXT: v_min_u32_e32 v3, v27, v3 2072; GISEL-NEXT: v_add_i32_e64 v0, s[6:7], 64, v0 2073; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], 64, v1 2074; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2075; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[4:5] 2076; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 2077; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 2078; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 2079; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 2080; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 2081; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc 2082; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5] 2083; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5] 2084; GISEL-NEXT: v_xor_b32_e32 v18, 0x7f, v2 2085; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[19:20] 2086; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc 2087; GISEL-NEXT: v_or_b32_e32 v18, v18, v0 2088; GISEL-NEXT: v_or_b32_e32 v19, v3, v1 2089; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1] 2090; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc 2091; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 2092; GISEL-NEXT: v_cndmask_b32_e32 v20, v22, v20, vcc 2093; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19] 2094; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 2095; GISEL-NEXT: v_or_b32_e32 v19, v21, v20 2096; GISEL-NEXT: v_or_b32_e32 v18, v19, v18 2097; GISEL-NEXT: v_and_b32_e32 v19, 1, v19 2098; GISEL-NEXT: v_and_b32_e32 v18, 1, v18 2099; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 2100; GISEL-NEXT: v_cndmask_b32_e64 v31, v16, 0, vcc 2101; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v18 2102; GISEL-NEXT: v_cndmask_b32_e64 v18, v8, 0, vcc 2103; GISEL-NEXT: v_cndmask_b32_e64 v19, v9, 0, vcc 2104; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1 2105; GISEL-NEXT: v_cndmask_b32_e64 v32, v17, 0, vcc 2106; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] 2107; GISEL-NEXT: s_cbranch_execz .LBB2_6 2108; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 2109; GISEL-NEXT: v_add_i32_e32 v31, vcc, 1, v2 2110; GISEL-NEXT: v_addc_u32_e64 v32, s[4:5], 0, v3, vcc 2111; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v2 2112; GISEL-NEXT: v_not_b32_e32 v2, 63 2113; GISEL-NEXT: v_addc_u32_e64 v33, vcc, 0, v0, s[4:5] 2114; GISEL-NEXT: v_addc_u32_e32 v34, vcc, 0, v1, vcc 2115; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v24, v2 2116; GISEL-NEXT: v_sub_i32_e64 v18, s[4:5], 64, v24 2117; GISEL-NEXT: v_lshl_b64 v[0:1], v[16:17], v24 2118; GISEL-NEXT: v_lshl_b64 v[2:3], v[8:9], v24 2119; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 2120; GISEL-NEXT: v_lshr_b64 v[18:19], v[16:17], v18 2121; GISEL-NEXT: v_lshl_b64 v[22:23], v[16:17], v20 2122; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24 2123; GISEL-NEXT: v_cndmask_b32_e32 v20, 0, v0, vcc 2124; GISEL-NEXT: v_cndmask_b32_e32 v21, 0, v1, vcc 2125; GISEL-NEXT: v_or_b32_e32 v0, v18, v2 2126; GISEL-NEXT: v_or_b32_e32 v1, v19, v3 2127; GISEL-NEXT: v_cndmask_b32_e32 v0, v22, v0, vcc 2128; GISEL-NEXT: v_cndmask_b32_e32 v1, v23, v1, vcc 2129; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 2130; GISEL-NEXT: v_cndmask_b32_e32 v18, v0, v8, vcc 2131; GISEL-NEXT: v_cndmask_b32_e32 v19, v1, v9, vcc 2132; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] 2133; GISEL-NEXT: v_mov_b32_e32 v0, s8 2134; GISEL-NEXT: v_mov_b32_e32 v1, s9 2135; GISEL-NEXT: v_mov_b32_e32 v2, s10 2136; GISEL-NEXT: v_mov_b32_e32 v3, s11 2137; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 2138; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7] 2139; GISEL-NEXT: s_cbranch_execz .LBB2_5 2140; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4 2141; GISEL-NEXT: v_add_i32_e32 v24, vcc, 0xffffffc0, v31 2142; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v31 2143; GISEL-NEXT: v_lshr_b64 v[0:1], v[8:9], v31 2144; GISEL-NEXT: v_lshr_b64 v[2:3], v[16:17], v31 2145; GISEL-NEXT: s_mov_b64 s[4:5], 0 2146; GISEL-NEXT: v_add_i32_e32 v35, vcc, -1, v30 2147; GISEL-NEXT: v_addc_u32_e32 v36, vcc, -1, v29, vcc 2148; GISEL-NEXT: v_lshl_b64 v[22:23], v[8:9], v22 2149; GISEL-NEXT: v_lshr_b64 v[24:25], v[8:9], v24 2150; GISEL-NEXT: v_addc_u32_e32 v37, vcc, -1, v10, vcc 2151; GISEL-NEXT: v_addc_u32_e32 v38, vcc, -1, v11, vcc 2152; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] 2153; GISEL-NEXT: v_or_b32_e32 v2, v2, v22 2154; GISEL-NEXT: v_or_b32_e32 v3, v3, v23 2155; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v31 2156; GISEL-NEXT: v_cndmask_b32_e32 v2, v24, v2, vcc 2157; GISEL-NEXT: v_cndmask_b32_e32 v3, v25, v3, vcc 2158; GISEL-NEXT: v_cndmask_b32_e32 v24, 0, v0, vcc 2159; GISEL-NEXT: v_cndmask_b32_e32 v25, 0, v1, vcc 2160; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v31 2161; GISEL-NEXT: v_cndmask_b32_e32 v26, v2, v16, vcc 2162; GISEL-NEXT: v_cndmask_b32_e32 v27, v3, v17, vcc 2163; GISEL-NEXT: v_mov_b32_e32 v23, 0 2164; GISEL-NEXT: v_mov_b32_e32 v0, s4 2165; GISEL-NEXT: v_mov_b32_e32 v1, s5 2166; GISEL-NEXT: v_mov_b32_e32 v2, s6 2167; GISEL-NEXT: v_mov_b32_e32 v3, s7 2168; GISEL-NEXT: .LBB2_3: ; %udiv-do-while3 2169; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 2170; GISEL-NEXT: v_lshrrev_b32_e32 v39, 31, v21 2171; GISEL-NEXT: v_lshl_b64 v[2:3], v[20:21], 1 2172; GISEL-NEXT: v_lshrrev_b32_e32 v22, 31, v27 2173; GISEL-NEXT: v_lshl_b64 v[26:27], v[26:27], 1 2174; GISEL-NEXT: v_lshl_b64 v[24:25], v[24:25], 1 2175; GISEL-NEXT: v_lshrrev_b32_e32 v48, 31, v19 2176; GISEL-NEXT: v_add_i32_e32 v31, vcc, -1, v31 2177; GISEL-NEXT: v_addc_u32_e32 v32, vcc, -1, v32, vcc 2178; GISEL-NEXT: v_lshl_b64 v[18:19], v[18:19], 1 2179; GISEL-NEXT: v_or_b32_e32 v20, v0, v2 2180; GISEL-NEXT: v_or_b32_e32 v21, v1, v3 2181; GISEL-NEXT: v_or_b32_e32 v2, v24, v22 2182; GISEL-NEXT: v_or_b32_e32 v3, v26, v48 2183; GISEL-NEXT: v_addc_u32_e32 v33, vcc, -1, v33, vcc 2184; GISEL-NEXT: v_addc_u32_e32 v34, vcc, -1, v34, vcc 2185; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v35, v3 2186; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v36, v27, vcc 2187; GISEL-NEXT: v_or_b32_e32 v0, v31, v33 2188; GISEL-NEXT: v_or_b32_e32 v1, v32, v34 2189; GISEL-NEXT: v_subb_u32_e32 v22, vcc, v37, v2, vcc 2190; GISEL-NEXT: v_subb_u32_e32 v22, vcc, v38, v25, vcc 2191; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 2192; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v22 2193; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2194; GISEL-NEXT: v_and_b32_e32 v1, v0, v30 2195; GISEL-NEXT: v_and_b32_e32 v24, v0, v29 2196; GISEL-NEXT: v_and_b32_e32 v48, v0, v10 2197; GISEL-NEXT: v_and_b32_e32 v49, v0, v11 2198; GISEL-NEXT: v_and_b32_e32 v22, 1, v0 2199; GISEL-NEXT: v_sub_i32_e32 v26, vcc, v3, v1 2200; GISEL-NEXT: v_subb_u32_e32 v27, vcc, v27, v24, vcc 2201; GISEL-NEXT: v_subb_u32_e32 v24, vcc, v2, v48, vcc 2202; GISEL-NEXT: v_subb_u32_e32 v25, vcc, v25, v49, vcc 2203; GISEL-NEXT: v_or_b32_e32 v18, v18, v39 2204; GISEL-NEXT: v_mov_b32_e32 v0, v22 2205; GISEL-NEXT: v_mov_b32_e32 v1, v23 2206; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5] 2207; GISEL-NEXT: s_cbranch_execnz .LBB2_3 2208; GISEL-NEXT: ; %bb.4: ; %Flow13 2209; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] 2210; GISEL-NEXT: .LBB2_5: ; %Flow14 2211; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] 2212; GISEL-NEXT: v_lshl_b64 v[2:3], v[20:21], 1 2213; GISEL-NEXT: v_lshl_b64 v[18:19], v[18:19], 1 2214; GISEL-NEXT: v_lshrrev_b32_e32 v20, 31, v21 2215; GISEL-NEXT: v_or_b32_e32 v18, v18, v20 2216; GISEL-NEXT: v_or_b32_e32 v31, v0, v2 2217; GISEL-NEXT: v_or_b32_e32 v32, v1, v3 2218; GISEL-NEXT: .LBB2_6: ; %Flow16 2219; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] 2220; GISEL-NEXT: s_mov_b64 s[8:9], 0 2221; GISEL-NEXT: v_ashrrev_i32_e32 v33, 31, v7 2222; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v15 2223; GISEL-NEXT: v_mov_b32_e32 v2, 0x7f 2224; GISEL-NEXT: v_mov_b32_e32 v3, 0 2225; GISEL-NEXT: v_xor_b32_e32 v1, v4, v33 2226; GISEL-NEXT: v_xor_b32_e32 v4, v5, v33 2227; GISEL-NEXT: v_xor_b32_e32 v5, v6, v33 2228; GISEL-NEXT: v_xor_b32_e32 v7, v7, v33 2229; GISEL-NEXT: v_xor_b32_e32 v6, v12, v0 2230; GISEL-NEXT: v_xor_b32_e32 v20, v13, v0 2231; GISEL-NEXT: v_xor_b32_e32 v14, v14, v0 2232; GISEL-NEXT: v_xor_b32_e32 v15, v15, v0 2233; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v1, v33 2234; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v4, v33, vcc 2235; GISEL-NEXT: v_sub_i32_e64 v35, s[4:5], v6, v0 2236; GISEL-NEXT: v_subb_u32_e64 v34, s[4:5], v20, v0, s[4:5] 2237; GISEL-NEXT: v_subb_u32_e32 v6, vcc, v5, v33, vcc 2238; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v7, v33, vcc 2239; GISEL-NEXT: v_subb_u32_e64 v4, vcc, v14, v0, s[4:5] 2240; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v15, v0, vcc 2241; GISEL-NEXT: v_ffbh_u32_e32 v20, v34 2242; GISEL-NEXT: v_ffbh_u32_e32 v21, v35 2243; GISEL-NEXT: v_ffbh_u32_e32 v22, v13 2244; GISEL-NEXT: v_ffbh_u32_e32 v23, v12 2245; GISEL-NEXT: v_or_b32_e32 v0, v35, v4 2246; GISEL-NEXT: v_or_b32_e32 v1, v34, v5 2247; GISEL-NEXT: v_or_b32_e32 v14, v12, v6 2248; GISEL-NEXT: v_or_b32_e32 v15, v13, v7 2249; GISEL-NEXT: v_add_i32_e32 v21, vcc, 32, v21 2250; GISEL-NEXT: v_ffbh_u32_e32 v24, v5 2251; GISEL-NEXT: v_ffbh_u32_e32 v25, v4 2252; GISEL-NEXT: v_add_i32_e32 v23, vcc, 32, v23 2253; GISEL-NEXT: v_ffbh_u32_e32 v26, v7 2254; GISEL-NEXT: v_ffbh_u32_e32 v27, v6 2255; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 2256; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[14:15] 2257; GISEL-NEXT: v_min_u32_e32 v0, v20, v21 2258; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], 32, v25 2259; GISEL-NEXT: v_min_u32_e32 v14, v22, v23 2260; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], 32, v27 2261; GISEL-NEXT: v_add_i32_e64 v0, s[6:7], 64, v0 2262; GISEL-NEXT: v_min_u32_e32 v1, v24, v1 2263; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], 64, v14 2264; GISEL-NEXT: v_min_u32_e32 v15, v26, v15 2265; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2266; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5] 2267; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] 2268; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 2269; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] 2270; GISEL-NEXT: v_cndmask_b32_e32 v1, v15, v14, vcc 2271; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v0, v1 2272; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], 0, 0, vcc 2273; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5] 2274; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5] 2275; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[14:15], v[2:3] 2276; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc 2277; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v14 2278; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1] 2279; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc 2280; GISEL-NEXT: v_or_b32_e32 v2, v2, v0 2281; GISEL-NEXT: v_or_b32_e32 v3, v15, v1 2282; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 2283; GISEL-NEXT: v_cndmask_b32_e32 v21, v22, v21, vcc 2284; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] 2285; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2286; GISEL-NEXT: v_or_b32_e32 v3, v20, v21 2287; GISEL-NEXT: v_and_b32_e32 v20, 1, v3 2288; GISEL-NEXT: v_or_b32_e32 v2, v3, v2 2289; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v20 2290; GISEL-NEXT: v_cndmask_b32_e64 v20, v12, 0, vcc 2291; GISEL-NEXT: v_and_b32_e32 v22, 1, v2 2292; GISEL-NEXT: v_cndmask_b32_e64 v21, v13, 0, vcc 2293; GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc 2294; GISEL-NEXT: v_cndmask_b32_e64 v3, v7, 0, vcc 2295; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 2296; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 2297; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] 2298; GISEL-NEXT: s_cbranch_execz .LBB2_12 2299; GISEL-NEXT: ; %bb.7: ; %udiv-bb1 2300; GISEL-NEXT: v_add_i32_e32 v36, vcc, 1, v14 2301; GISEL-NEXT: v_addc_u32_e64 v37, s[4:5], 0, v15, vcc 2302; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v14 2303; GISEL-NEXT: v_not_b32_e32 v2, 63 2304; GISEL-NEXT: v_addc_u32_e64 v38, vcc, 0, v0, s[4:5] 2305; GISEL-NEXT: v_addc_u32_e32 v39, vcc, 0, v1, vcc 2306; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v24, v2 2307; GISEL-NEXT: v_sub_i32_e64 v14, s[4:5], 64, v24 2308; GISEL-NEXT: v_lshl_b64 v[0:1], v[12:13], v24 2309; GISEL-NEXT: v_lshl_b64 v[2:3], v[6:7], v24 2310; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 2311; GISEL-NEXT: v_lshr_b64 v[14:15], v[12:13], v14 2312; GISEL-NEXT: v_lshl_b64 v[22:23], v[12:13], v20 2313; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24 2314; GISEL-NEXT: v_cndmask_b32_e32 v20, 0, v0, vcc 2315; GISEL-NEXT: v_cndmask_b32_e32 v21, 0, v1, vcc 2316; GISEL-NEXT: v_or_b32_e32 v0, v14, v2 2317; GISEL-NEXT: v_or_b32_e32 v1, v15, v3 2318; GISEL-NEXT: v_cndmask_b32_e32 v0, v22, v0, vcc 2319; GISEL-NEXT: v_cndmask_b32_e32 v1, v23, v1, vcc 2320; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 2321; GISEL-NEXT: v_cndmask_b32_e32 v14, v0, v6, vcc 2322; GISEL-NEXT: v_cndmask_b32_e32 v15, v1, v7, vcc 2323; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] 2324; GISEL-NEXT: v_mov_b32_e32 v0, s8 2325; GISEL-NEXT: v_mov_b32_e32 v1, s9 2326; GISEL-NEXT: v_mov_b32_e32 v2, s10 2327; GISEL-NEXT: v_mov_b32_e32 v3, s11 2328; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 2329; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7] 2330; GISEL-NEXT: s_cbranch_execz .LBB2_11 2331; GISEL-NEXT: ; %bb.8: ; %udiv-preheader 2332; GISEL-NEXT: v_add_i32_e32 v24, vcc, 0xffffffc0, v36 2333; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v36 2334; GISEL-NEXT: v_lshr_b64 v[0:1], v[6:7], v36 2335; GISEL-NEXT: v_lshr_b64 v[2:3], v[12:13], v36 2336; GISEL-NEXT: s_mov_b64 s[4:5], 0 2337; GISEL-NEXT: v_add_i32_e32 v48, vcc, -1, v35 2338; GISEL-NEXT: v_addc_u32_e32 v49, vcc, -1, v34, vcc 2339; GISEL-NEXT: v_lshl_b64 v[22:23], v[6:7], v22 2340; GISEL-NEXT: v_lshr_b64 v[24:25], v[6:7], v24 2341; GISEL-NEXT: v_addc_u32_e32 v50, vcc, -1, v4, vcc 2342; GISEL-NEXT: v_addc_u32_e32 v51, vcc, -1, v5, vcc 2343; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] 2344; GISEL-NEXT: v_or_b32_e32 v2, v2, v22 2345; GISEL-NEXT: v_or_b32_e32 v3, v3, v23 2346; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v36 2347; GISEL-NEXT: v_cndmask_b32_e32 v2, v24, v2, vcc 2348; GISEL-NEXT: v_cndmask_b32_e32 v3, v25, v3, vcc 2349; GISEL-NEXT: v_cndmask_b32_e32 v26, 0, v0, vcc 2350; GISEL-NEXT: v_cndmask_b32_e32 v27, 0, v1, vcc 2351; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v36 2352; GISEL-NEXT: v_cndmask_b32_e32 v24, v2, v12, vcc 2353; GISEL-NEXT: v_cndmask_b32_e32 v25, v3, v13, vcc 2354; GISEL-NEXT: v_mov_b32_e32 v23, 0 2355; GISEL-NEXT: v_mov_b32_e32 v0, s4 2356; GISEL-NEXT: v_mov_b32_e32 v1, s5 2357; GISEL-NEXT: v_mov_b32_e32 v2, s6 2358; GISEL-NEXT: v_mov_b32_e32 v3, s7 2359; GISEL-NEXT: .LBB2_9: ; %udiv-do-while 2360; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 2361; GISEL-NEXT: v_lshl_b64 v[2:3], v[20:21], 1 2362; GISEL-NEXT: v_lshrrev_b32_e32 v22, 31, v21 2363; GISEL-NEXT: v_lshl_b64 v[52:53], v[24:25], 1 2364; GISEL-NEXT: v_lshl_b64 v[26:27], v[26:27], 1 2365; GISEL-NEXT: v_lshrrev_b32_e32 v24, 31, v25 2366; GISEL-NEXT: v_lshrrev_b32_e32 v25, 31, v15 2367; GISEL-NEXT: v_lshl_b64 v[14:15], v[14:15], 1 2368; GISEL-NEXT: v_add_i32_e32 v36, vcc, -1, v36 2369; GISEL-NEXT: v_addc_u32_e32 v37, vcc, -1, v37, vcc 2370; GISEL-NEXT: v_or_b32_e32 v20, v0, v2 2371; GISEL-NEXT: v_or_b32_e32 v21, v1, v3 2372; GISEL-NEXT: v_or_b32_e32 v2, v26, v24 2373; GISEL-NEXT: v_or_b32_e32 v3, v52, v25 2374; GISEL-NEXT: v_or_b32_e32 v14, v14, v22 2375; GISEL-NEXT: v_addc_u32_e32 v38, vcc, -1, v38, vcc 2376; GISEL-NEXT: v_addc_u32_e32 v39, vcc, -1, v39, vcc 2377; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v48, v3 2378; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v49, v53, vcc 2379; GISEL-NEXT: v_or_b32_e32 v0, v36, v38 2380; GISEL-NEXT: v_or_b32_e32 v1, v37, v39 2381; GISEL-NEXT: v_subb_u32_e32 v22, vcc, v50, v2, vcc 2382; GISEL-NEXT: v_subb_u32_e32 v22, vcc, v51, v27, vcc 2383; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 2384; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v22 2385; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2386; GISEL-NEXT: v_and_b32_e32 v22, 1, v0 2387; GISEL-NEXT: v_and_b32_e32 v1, v0, v35 2388; GISEL-NEXT: v_and_b32_e32 v25, v0, v34 2389; GISEL-NEXT: v_and_b32_e32 v26, v0, v4 2390; GISEL-NEXT: v_and_b32_e32 v52, v0, v5 2391; GISEL-NEXT: v_sub_i32_e32 v24, vcc, v3, v1 2392; GISEL-NEXT: v_subb_u32_e32 v25, vcc, v53, v25, vcc 2393; GISEL-NEXT: v_mov_b32_e32 v0, v22 2394; GISEL-NEXT: v_mov_b32_e32 v1, v23 2395; GISEL-NEXT: v_subb_u32_e32 v26, vcc, v2, v26, vcc 2396; GISEL-NEXT: v_subb_u32_e32 v27, vcc, v27, v52, vcc 2397; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5] 2398; GISEL-NEXT: s_cbranch_execnz .LBB2_9 2399; GISEL-NEXT: ; %bb.10: ; %Flow 2400; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] 2401; GISEL-NEXT: .LBB2_11: ; %Flow11 2402; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] 2403; GISEL-NEXT: v_lshl_b64 v[22:23], v[20:21], 1 2404; GISEL-NEXT: v_lshl_b64 v[2:3], v[14:15], 1 2405; GISEL-NEXT: v_lshrrev_b32_e32 v14, 31, v21 2406; GISEL-NEXT: v_or_b32_e32 v2, v2, v14 2407; GISEL-NEXT: v_or_b32_e32 v20, v0, v22 2408; GISEL-NEXT: v_or_b32_e32 v21, v1, v23 2409; GISEL-NEXT: .LBB2_12: ; %Flow12 2410; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] 2411; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v30, v31, 0 2412; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v30, v18, 0 2413; GISEL-NEXT: v_mul_lo_u32 v24, v30, v19 2414; GISEL-NEXT: v_mul_lo_u32 v25, v29, v18 2415; GISEL-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v35, v20, 0 2416; GISEL-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v35, v2, 0 2417; GISEL-NEXT: v_mul_lo_u32 v26, v35, v3 2418; GISEL-NEXT: v_mul_lo_u32 v27, v34, v2 2419; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v29, v32, v[14:15] 2420; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v34, v21, v[22:23] 2421; GISEL-NEXT: v_mov_b32_e32 v22, v19 2422; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v31, v[2:3] 2423; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v4, v20, v[14:15] 2424; GISEL-NEXT: v_mad_u64_u32 v[1:2], vcc, v30, v32, v[1:2] 2425; GISEL-NEXT: v_mov_b32_e32 v23, v14 2426; GISEL-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v35, v21, v[22:23] 2427; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[6:7], v29, v31, v[1:2] 2428; GISEL-NEXT: v_addc_u32_e64 v3, s[6:7], v3, v24, s[6:7] 2429; GISEL-NEXT: v_mad_u64_u32 v[22:23], s[6:7], v34, v20, v[22:23] 2430; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v15, v26, s[6:7] 2431; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v25, vcc 2432; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v16, v0 2433; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v17, v1, vcc 2434; GISEL-NEXT: v_xor_b32_e32 v15, v0, v28 2435; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v14, v27, s[4:5] 2436; GISEL-NEXT: v_sub_i32_e64 v12, s[4:5], v12, v18 2437; GISEL-NEXT: v_subb_u32_e64 v14, s[4:5], v13, v22, s[4:5] 2438; GISEL-NEXT: v_xor_b32_e32 v16, v12, v33 2439; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v10, v32, v[3:4] 2440; GISEL-NEXT: v_xor_b32_e32 v1, v1, v28 2441; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v4, v21, v[0:1] 2442; GISEL-NEXT: v_xor_b32_e32 v14, v14, v33 2443; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[6:7], v11, v31, v[12:13] 2444; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v15, v28 2445; GISEL-NEXT: v_subb_u32_e64 v1, s[6:7], v1, v28, s[6:7] 2446; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[8:9], v5, v20, v[3:4] 2447; GISEL-NEXT: v_sub_i32_e64 v4, s[8:9], v16, v33 2448; GISEL-NEXT: v_subb_u32_e64 v5, s[8:9], v14, v33, s[8:9] 2449; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v8, v2, vcc 2450; GISEL-NEXT: v_subb_u32_e32 v8, vcc, v9, v10, vcc 2451; GISEL-NEXT: v_xor_b32_e32 v2, v2, v28 2452; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v6, v23, s[4:5] 2453; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v7, v3, vcc 2454; GISEL-NEXT: v_xor_b32_e32 v6, v6, v33 2455; GISEL-NEXT: v_xor_b32_e32 v7, v8, v28 2456; GISEL-NEXT: v_xor_b32_e32 v8, v3, v33 2457; GISEL-NEXT: v_subb_u32_e64 v2, vcc, v2, v28, s[6:7] 2458; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v7, v28, vcc 2459; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v6, v33, s[8:9] 2460; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v8, v33, vcc 2461; GISEL-NEXT: s_setpc_b64 s[30:31] 2462 %shl = srem <2 x i128> %lhs, %rhs 2463 ret <2 x i128> %shl 2464} 2465 2466define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { 2467; SDAG-LABEL: v_urem_v2i128_vv: 2468; SDAG: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases 2469; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2470; SDAG-NEXT: v_or_b32_e32 v17, v9, v11 2471; SDAG-NEXT: v_or_b32_e32 v16, v8, v10 2472; SDAG-NEXT: v_or_b32_e32 v19, v1, v3 2473; SDAG-NEXT: v_or_b32_e32 v18, v0, v2 2474; SDAG-NEXT: v_ffbh_u32_e32 v20, v10 2475; SDAG-NEXT: v_ffbh_u32_e32 v21, v11 2476; SDAG-NEXT: v_ffbh_u32_e32 v22, v8 2477; SDAG-NEXT: v_ffbh_u32_e32 v23, v9 2478; SDAG-NEXT: v_ffbh_u32_e32 v24, v2 2479; SDAG-NEXT: v_ffbh_u32_e32 v25, v3 2480; SDAG-NEXT: v_ffbh_u32_e32 v26, v0 2481; SDAG-NEXT: v_ffbh_u32_e32 v27, v1 2482; SDAG-NEXT: v_mov_b32_e32 v28, 0 2483; SDAG-NEXT: s_mov_b64 s[8:9], 0x7f 2484; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] 2485; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19] 2486; SDAG-NEXT: v_add_i32_e64 v16, s[6:7], 32, v20 2487; SDAG-NEXT: v_add_i32_e64 v17, s[6:7], 32, v22 2488; SDAG-NEXT: v_add_i32_e64 v18, s[6:7], 32, v24 2489; SDAG-NEXT: v_add_i32_e64 v19, s[6:7], 32, v26 2490; SDAG-NEXT: v_min_u32_e32 v16, v16, v21 2491; SDAG-NEXT: v_min_u32_e32 v17, v17, v23 2492; SDAG-NEXT: v_min_u32_e32 v18, v18, v25 2493; SDAG-NEXT: v_min_u32_e32 v19, v19, v27 2494; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5] 2495; SDAG-NEXT: v_add_i32_e32 v17, vcc, 64, v17 2496; SDAG-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, vcc 2497; SDAG-NEXT: v_add_i32_e32 v19, vcc, 64, v19 2498; SDAG-NEXT: v_addc_u32_e64 v21, s[4:5], 0, 0, vcc 2499; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] 2500; SDAG-NEXT: v_cndmask_b32_e64 v20, v20, 0, vcc 2501; SDAG-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc 2502; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 2503; SDAG-NEXT: v_cndmask_b32_e64 v17, v21, 0, vcc 2504; SDAG-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc 2505; SDAG-NEXT: v_sub_i32_e32 v18, vcc, v16, v18 2506; SDAG-NEXT: v_subb_u32_e32 v19, vcc, v20, v17, vcc 2507; SDAG-NEXT: v_xor_b32_e32 v16, 0x7f, v18 2508; SDAG-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v28, vcc 2509; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[18:19] 2510; SDAG-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[4:5] 2511; SDAG-NEXT: v_subbrev_u32_e32 v21, vcc, 0, v28, vcc 2512; SDAG-NEXT: v_or_b32_e32 v16, v16, v20 2513; SDAG-NEXT: v_or_b32_e32 v17, v19, v21 2514; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21] 2515; SDAG-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc 2516; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17] 2517; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[20:21] 2518; SDAG-NEXT: v_cndmask_b32_e64 v16, v23, v22, s[4:5] 2519; SDAG-NEXT: v_and_b32_e32 v16, 1, v16 2520; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v16 2521; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 2522; SDAG-NEXT: v_cndmask_b32_e64 v33, v3, 0, s[4:5] 2523; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1 2524; SDAG-NEXT: v_cndmask_b32_e64 v31, v2, 0, s[4:5] 2525; SDAG-NEXT: v_cndmask_b32_e64 v30, v1, 0, s[4:5] 2526; SDAG-NEXT: s_and_b64 s[8:9], s[6:7], vcc 2527; SDAG-NEXT: v_cndmask_b32_e64 v32, v0, 0, s[4:5] 2528; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] 2529; SDAG-NEXT: s_cbranch_execz .LBB3_6 2530; SDAG-NEXT: ; %bb.1: ; %udiv-bb15 2531; SDAG-NEXT: v_add_i32_e32 v30, vcc, 1, v18 2532; SDAG-NEXT: v_sub_i32_e64 v22, s[4:5], 63, v18 2533; SDAG-NEXT: v_mov_b32_e32 v16, 0 2534; SDAG-NEXT: v_mov_b32_e32 v17, 0 2535; SDAG-NEXT: v_addc_u32_e32 v31, vcc, 0, v19, vcc 2536; SDAG-NEXT: v_lshl_b64 v[22:23], v[0:1], v22 2537; SDAG-NEXT: v_addc_u32_e32 v32, vcc, 0, v20, vcc 2538; SDAG-NEXT: v_addc_u32_e32 v33, vcc, 0, v21, vcc 2539; SDAG-NEXT: v_or_b32_e32 v19, v30, v32 2540; SDAG-NEXT: v_sub_i32_e32 v21, vcc, 0x7f, v18 2541; SDAG-NEXT: v_or_b32_e32 v20, v31, v33 2542; SDAG-NEXT: v_lshl_b64 v[24:25], v[2:3], v21 2543; SDAG-NEXT: v_sub_i32_e32 v18, vcc, 64, v21 2544; SDAG-NEXT: v_lshl_b64 v[26:27], v[0:1], v21 2545; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[19:20] 2546; SDAG-NEXT: v_lshr_b64 v[18:19], v[0:1], v18 2547; SDAG-NEXT: v_or_b32_e32 v19, v25, v19 2548; SDAG-NEXT: v_or_b32_e32 v18, v24, v18 2549; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v21 2550; SDAG-NEXT: v_cndmask_b32_e64 v19, v23, v19, s[4:5] 2551; SDAG-NEXT: v_cndmask_b32_e64 v18, v22, v18, s[4:5] 2552; SDAG-NEXT: v_cndmask_b32_e64 v23, 0, v27, s[4:5] 2553; SDAG-NEXT: v_cndmask_b32_e64 v22, 0, v26, s[4:5] 2554; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v21 2555; SDAG-NEXT: v_cndmask_b32_e64 v19, v19, v3, s[4:5] 2556; SDAG-NEXT: v_cndmask_b32_e64 v18, v18, v2, s[4:5] 2557; SDAG-NEXT: v_mov_b32_e32 v20, 0 2558; SDAG-NEXT: v_mov_b32_e32 v21, 0 2559; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc 2560; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 2561; SDAG-NEXT: s_cbranch_execz .LBB3_5 2562; SDAG-NEXT: ; %bb.2: ; %udiv-preheader4 2563; SDAG-NEXT: v_lshr_b64 v[16:17], v[0:1], v30 2564; SDAG-NEXT: v_sub_i32_e32 v28, vcc, 64, v30 2565; SDAG-NEXT: v_subrev_i32_e32 v35, vcc, 64, v30 2566; SDAG-NEXT: v_lshr_b64 v[26:27], v[2:3], v30 2567; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v8 2568; SDAG-NEXT: s_mov_b64 s[10:11], 0 2569; SDAG-NEXT: v_mov_b32_e32 v24, 0 2570; SDAG-NEXT: v_mov_b32_e32 v25, 0 2571; SDAG-NEXT: v_mov_b32_e32 v20, 0 2572; SDAG-NEXT: v_mov_b32_e32 v21, 0 2573; SDAG-NEXT: v_lshl_b64 v[28:29], v[2:3], v28 2574; SDAG-NEXT: v_lshr_b64 v[37:38], v[2:3], v35 2575; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v9, vcc 2576; SDAG-NEXT: v_or_b32_e32 v17, v17, v29 2577; SDAG-NEXT: v_or_b32_e32 v16, v16, v28 2578; SDAG-NEXT: v_addc_u32_e32 v36, vcc, -1, v10, vcc 2579; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v30 2580; SDAG-NEXT: v_cndmask_b32_e64 v17, v38, v17, s[4:5] 2581; SDAG-NEXT: v_cndmask_b32_e64 v16, v37, v16, s[4:5] 2582; SDAG-NEXT: v_cndmask_b32_e64 v29, 0, v27, s[4:5] 2583; SDAG-NEXT: v_cndmask_b32_e64 v28, 0, v26, s[4:5] 2584; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v11, vcc 2585; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v30 2586; SDAG-NEXT: v_cndmask_b32_e32 v27, v17, v1, vcc 2587; SDAG-NEXT: v_cndmask_b32_e32 v26, v16, v0, vcc 2588; SDAG-NEXT: v_mov_b32_e32 v17, 0 2589; SDAG-NEXT: .LBB3_3: ; %udiv-do-while3 2590; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 2591; SDAG-NEXT: v_lshrrev_b32_e32 v16, 31, v23 2592; SDAG-NEXT: v_lshl_b64 v[22:23], v[22:23], 1 2593; SDAG-NEXT: v_lshl_b64 v[28:29], v[28:29], 1 2594; SDAG-NEXT: v_lshrrev_b32_e32 v38, 31, v27 2595; SDAG-NEXT: v_lshl_b64 v[26:27], v[26:27], 1 2596; SDAG-NEXT: v_lshrrev_b32_e32 v39, 31, v19 2597; SDAG-NEXT: v_lshl_b64 v[18:19], v[18:19], 1 2598; SDAG-NEXT: v_or_b32_e32 v23, v25, v23 2599; SDAG-NEXT: v_or_b32_e32 v22, v24, v22 2600; SDAG-NEXT: v_or_b32_e32 v24, v28, v38 2601; SDAG-NEXT: v_or_b32_e32 v25, v26, v39 2602; SDAG-NEXT: v_or_b32_e32 v18, v18, v16 2603; SDAG-NEXT: v_sub_i32_e32 v16, vcc, v34, v25 2604; SDAG-NEXT: v_subb_u32_e32 v16, vcc, v35, v27, vcc 2605; SDAG-NEXT: v_subb_u32_e32 v16, vcc, v36, v24, vcc 2606; SDAG-NEXT: v_subb_u32_e32 v16, vcc, v37, v29, vcc 2607; SDAG-NEXT: v_ashrrev_i32_e32 v16, 31, v16 2608; SDAG-NEXT: v_and_b32_e32 v26, v16, v8 2609; SDAG-NEXT: v_and_b32_e32 v28, v16, v9 2610; SDAG-NEXT: v_and_b32_e32 v38, v16, v10 2611; SDAG-NEXT: v_and_b32_e32 v39, v16, v11 2612; SDAG-NEXT: v_and_b32_e32 v16, 1, v16 2613; SDAG-NEXT: v_sub_i32_e32 v26, vcc, v25, v26 2614; SDAG-NEXT: v_subb_u32_e32 v27, vcc, v27, v28, vcc 2615; SDAG-NEXT: v_subb_u32_e32 v28, vcc, v24, v38, vcc 2616; SDAG-NEXT: v_subb_u32_e32 v29, vcc, v29, v39, vcc 2617; SDAG-NEXT: v_add_i32_e32 v30, vcc, -1, v30 2618; SDAG-NEXT: v_addc_u32_e32 v31, vcc, -1, v31, vcc 2619; SDAG-NEXT: v_addc_u32_e32 v32, vcc, -1, v32, vcc 2620; SDAG-NEXT: v_addc_u32_e32 v33, vcc, -1, v33, vcc 2621; SDAG-NEXT: v_or_b32_e32 v24, v30, v32 2622; SDAG-NEXT: v_or_b32_e32 v25, v31, v33 2623; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[24:25] 2624; SDAG-NEXT: v_or_b32_e32 v19, v21, v19 2625; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11] 2626; SDAG-NEXT: v_or_b32_e32 v18, v20, v18 2627; SDAG-NEXT: v_mov_b32_e32 v25, v17 2628; SDAG-NEXT: v_mov_b32_e32 v24, v16 2629; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11] 2630; SDAG-NEXT: s_cbranch_execnz .LBB3_3 2631; SDAG-NEXT: ; %bb.4: ; %Flow13 2632; SDAG-NEXT: s_or_b64 exec, exec, s[10:11] 2633; SDAG-NEXT: .LBB3_5: ; %Flow14 2634; SDAG-NEXT: s_or_b64 exec, exec, s[8:9] 2635; SDAG-NEXT: v_lshl_b64 v[18:19], v[18:19], 1 2636; SDAG-NEXT: v_lshrrev_b32_e32 v24, 31, v23 2637; SDAG-NEXT: v_lshl_b64 v[22:23], v[22:23], 1 2638; SDAG-NEXT: v_or_b32_e32 v18, v18, v24 2639; SDAG-NEXT: v_or_b32_e32 v33, v21, v19 2640; SDAG-NEXT: v_or_b32_e32 v30, v17, v23 2641; SDAG-NEXT: v_or_b32_e32 v31, v20, v18 2642; SDAG-NEXT: v_or_b32_e32 v32, v16, v22 2643; SDAG-NEXT: .LBB3_6: ; %Flow16 2644; SDAG-NEXT: s_or_b64 exec, exec, s[6:7] 2645; SDAG-NEXT: v_or_b32_e32 v17, v13, v15 2646; SDAG-NEXT: v_or_b32_e32 v16, v12, v14 2647; SDAG-NEXT: v_or_b32_e32 v19, v5, v7 2648; SDAG-NEXT: v_or_b32_e32 v18, v4, v6 2649; SDAG-NEXT: v_ffbh_u32_e32 v20, v14 2650; SDAG-NEXT: v_ffbh_u32_e32 v21, v15 2651; SDAG-NEXT: v_ffbh_u32_e32 v22, v12 2652; SDAG-NEXT: v_ffbh_u32_e32 v23, v13 2653; SDAG-NEXT: v_ffbh_u32_e32 v24, v6 2654; SDAG-NEXT: v_ffbh_u32_e32 v25, v7 2655; SDAG-NEXT: v_ffbh_u32_e32 v26, v4 2656; SDAG-NEXT: v_ffbh_u32_e32 v27, v5 2657; SDAG-NEXT: v_mov_b32_e32 v28, 0 2658; SDAG-NEXT: s_mov_b64 s[8:9], 0x7f 2659; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] 2660; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19] 2661; SDAG-NEXT: v_add_i32_e64 v16, s[6:7], 32, v20 2662; SDAG-NEXT: v_add_i32_e64 v17, s[6:7], 32, v22 2663; SDAG-NEXT: v_add_i32_e64 v18, s[6:7], 32, v24 2664; SDAG-NEXT: v_add_i32_e64 v19, s[6:7], 32, v26 2665; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5] 2666; SDAG-NEXT: v_min_u32_e32 v16, v16, v21 2667; SDAG-NEXT: v_min_u32_e32 v17, v17, v23 2668; SDAG-NEXT: v_min_u32_e32 v18, v18, v25 2669; SDAG-NEXT: v_min_u32_e32 v19, v19, v27 2670; SDAG-NEXT: v_add_i32_e32 v17, vcc, 64, v17 2671; SDAG-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, vcc 2672; SDAG-NEXT: v_add_i32_e32 v19, vcc, 64, v19 2673; SDAG-NEXT: v_addc_u32_e64 v21, s[4:5], 0, 0, vcc 2674; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] 2675; SDAG-NEXT: v_cndmask_b32_e64 v20, v20, 0, vcc 2676; SDAG-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc 2677; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] 2678; SDAG-NEXT: v_cndmask_b32_e64 v17, v21, 0, vcc 2679; SDAG-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc 2680; SDAG-NEXT: v_sub_i32_e32 v16, vcc, v16, v18 2681; SDAG-NEXT: v_subb_u32_e32 v17, vcc, v20, v17, vcc 2682; SDAG-NEXT: v_xor_b32_e32 v18, 0x7f, v16 2683; SDAG-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v28, vcc 2684; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[16:17] 2685; SDAG-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[4:5] 2686; SDAG-NEXT: v_subbrev_u32_e32 v21, vcc, 0, v28, vcc 2687; SDAG-NEXT: v_or_b32_e32 v18, v18, v20 2688; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21] 2689; SDAG-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc 2690; SDAG-NEXT: v_or_b32_e32 v19, v17, v21 2691; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21] 2692; SDAG-NEXT: v_cndmask_b32_e32 v22, v23, v22, vcc 2693; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19] 2694; SDAG-NEXT: v_and_b32_e32 v18, 1, v22 2695; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v18 2696; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 2697; SDAG-NEXT: v_cndmask_b32_e64 v23, v7, 0, s[4:5] 2698; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1 2699; SDAG-NEXT: v_cndmask_b32_e64 v22, v6, 0, s[4:5] 2700; SDAG-NEXT: v_cndmask_b32_e64 v19, v5, 0, s[4:5] 2701; SDAG-NEXT: v_cndmask_b32_e64 v18, v4, 0, s[4:5] 2702; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc 2703; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 2704; SDAG-NEXT: s_cbranch_execz .LBB3_12 2705; SDAG-NEXT: ; %bb.7: ; %udiv-bb1 2706; SDAG-NEXT: v_add_i32_e32 v34, vcc, 1, v16 2707; SDAG-NEXT: v_sub_i32_e64 v22, s[4:5], 63, v16 2708; SDAG-NEXT: v_mov_b32_e32 v18, 0 2709; SDAG-NEXT: v_mov_b32_e32 v19, 0 2710; SDAG-NEXT: v_addc_u32_e32 v35, vcc, 0, v17, vcc 2711; SDAG-NEXT: v_lshl_b64 v[22:23], v[4:5], v22 2712; SDAG-NEXT: v_addc_u32_e32 v36, vcc, 0, v20, vcc 2713; SDAG-NEXT: v_addc_u32_e32 v37, vcc, 0, v21, vcc 2714; SDAG-NEXT: v_or_b32_e32 v20, v34, v36 2715; SDAG-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v16 2716; SDAG-NEXT: v_or_b32_e32 v21, v35, v37 2717; SDAG-NEXT: v_lshl_b64 v[16:17], v[6:7], v26 2718; SDAG-NEXT: v_sub_i32_e32 v27, vcc, 64, v26 2719; SDAG-NEXT: v_lshl_b64 v[24:25], v[4:5], v26 2720; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21] 2721; SDAG-NEXT: v_lshr_b64 v[20:21], v[4:5], v27 2722; SDAG-NEXT: v_or_b32_e32 v17, v17, v21 2723; SDAG-NEXT: v_or_b32_e32 v16, v16, v20 2724; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v26 2725; SDAG-NEXT: v_cndmask_b32_e64 v17, v23, v17, s[4:5] 2726; SDAG-NEXT: v_cndmask_b32_e64 v16, v22, v16, s[4:5] 2727; SDAG-NEXT: v_cndmask_b32_e64 v21, 0, v25, s[4:5] 2728; SDAG-NEXT: v_cndmask_b32_e64 v20, 0, v24, s[4:5] 2729; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v26 2730; SDAG-NEXT: v_cndmask_b32_e64 v17, v17, v7, s[4:5] 2731; SDAG-NEXT: v_cndmask_b32_e64 v16, v16, v6, s[4:5] 2732; SDAG-NEXT: v_mov_b32_e32 v22, 0 2733; SDAG-NEXT: v_mov_b32_e32 v23, 0 2734; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc 2735; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 2736; SDAG-NEXT: s_cbranch_execz .LBB3_11 2737; SDAG-NEXT: ; %bb.8: ; %udiv-preheader 2738; SDAG-NEXT: v_lshr_b64 v[18:19], v[4:5], v34 2739; SDAG-NEXT: v_sub_i32_e32 v28, vcc, 64, v34 2740; SDAG-NEXT: v_subrev_i32_e32 v39, vcc, 64, v34 2741; SDAG-NEXT: v_lshr_b64 v[26:27], v[6:7], v34 2742; SDAG-NEXT: v_add_i32_e32 v38, vcc, -1, v12 2743; SDAG-NEXT: s_mov_b64 s[10:11], 0 2744; SDAG-NEXT: v_mov_b32_e32 v24, 0 2745; SDAG-NEXT: v_mov_b32_e32 v25, 0 2746; SDAG-NEXT: v_mov_b32_e32 v22, 0 2747; SDAG-NEXT: v_mov_b32_e32 v23, 0 2748; SDAG-NEXT: v_lshl_b64 v[28:29], v[6:7], v28 2749; SDAG-NEXT: v_lshr_b64 v[49:50], v[6:7], v39 2750; SDAG-NEXT: v_addc_u32_e32 v39, vcc, -1, v13, vcc 2751; SDAG-NEXT: v_or_b32_e32 v19, v19, v29 2752; SDAG-NEXT: v_or_b32_e32 v18, v18, v28 2753; SDAG-NEXT: v_addc_u32_e32 v48, vcc, -1, v14, vcc 2754; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v34 2755; SDAG-NEXT: v_cndmask_b32_e64 v19, v50, v19, s[4:5] 2756; SDAG-NEXT: v_cndmask_b32_e64 v18, v49, v18, s[4:5] 2757; SDAG-NEXT: v_cndmask_b32_e64 v29, 0, v27, s[4:5] 2758; SDAG-NEXT: v_cndmask_b32_e64 v28, 0, v26, s[4:5] 2759; SDAG-NEXT: v_addc_u32_e32 v49, vcc, -1, v15, vcc 2760; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v34 2761; SDAG-NEXT: v_cndmask_b32_e32 v27, v19, v5, vcc 2762; SDAG-NEXT: v_cndmask_b32_e32 v26, v18, v4, vcc 2763; SDAG-NEXT: v_mov_b32_e32 v19, 0 2764; SDAG-NEXT: .LBB3_9: ; %udiv-do-while 2765; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 2766; SDAG-NEXT: v_lshl_b64 v[28:29], v[28:29], 1 2767; SDAG-NEXT: v_lshrrev_b32_e32 v18, 31, v27 2768; SDAG-NEXT: v_lshl_b64 v[26:27], v[26:27], 1 2769; SDAG-NEXT: v_lshrrev_b32_e32 v50, 31, v17 2770; SDAG-NEXT: v_lshl_b64 v[16:17], v[16:17], 1 2771; SDAG-NEXT: v_lshrrev_b32_e32 v51, 31, v21 2772; SDAG-NEXT: v_lshl_b64 v[20:21], v[20:21], 1 2773; SDAG-NEXT: v_or_b32_e32 v18, v28, v18 2774; SDAG-NEXT: v_or_b32_e32 v26, v26, v50 2775; SDAG-NEXT: v_or_b32_e32 v16, v16, v51 2776; SDAG-NEXT: v_or_b32_e32 v17, v23, v17 2777; SDAG-NEXT: v_or_b32_e32 v21, v25, v21 2778; SDAG-NEXT: v_sub_i32_e32 v25, vcc, v38, v26 2779; SDAG-NEXT: v_or_b32_e32 v16, v22, v16 2780; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v39, v27, vcc 2781; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v48, v18, vcc 2782; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v49, v29, vcc 2783; SDAG-NEXT: v_ashrrev_i32_e32 v25, 31, v25 2784; SDAG-NEXT: v_and_b32_e32 v28, v25, v12 2785; SDAG-NEXT: v_and_b32_e32 v50, v25, v13 2786; SDAG-NEXT: v_and_b32_e32 v51, v25, v14 2787; SDAG-NEXT: v_and_b32_e32 v52, v25, v15 2788; SDAG-NEXT: v_sub_i32_e32 v26, vcc, v26, v28 2789; SDAG-NEXT: v_subb_u32_e32 v27, vcc, v27, v50, vcc 2790; SDAG-NEXT: v_subb_u32_e32 v28, vcc, v18, v51, vcc 2791; SDAG-NEXT: v_subb_u32_e32 v29, vcc, v29, v52, vcc 2792; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v34 2793; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v35, vcc 2794; SDAG-NEXT: v_addc_u32_e32 v36, vcc, -1, v36, vcc 2795; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v37, vcc 2796; SDAG-NEXT: v_or_b32_e32 v50, v34, v36 2797; SDAG-NEXT: v_or_b32_e32 v51, v35, v37 2798; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[50:51] 2799; SDAG-NEXT: v_and_b32_e32 v18, 1, v25 2800; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11] 2801; SDAG-NEXT: v_or_b32_e32 v20, v24, v20 2802; SDAG-NEXT: v_mov_b32_e32 v25, v19 2803; SDAG-NEXT: v_mov_b32_e32 v24, v18 2804; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11] 2805; SDAG-NEXT: s_cbranch_execnz .LBB3_9 2806; SDAG-NEXT: ; %bb.10: ; %Flow 2807; SDAG-NEXT: s_or_b64 exec, exec, s[10:11] 2808; SDAG-NEXT: .LBB3_11: ; %Flow11 2809; SDAG-NEXT: s_or_b64 exec, exec, s[8:9] 2810; SDAG-NEXT: v_lshl_b64 v[16:17], v[16:17], 1 2811; SDAG-NEXT: v_lshrrev_b32_e32 v24, 31, v21 2812; SDAG-NEXT: v_lshl_b64 v[20:21], v[20:21], 1 2813; SDAG-NEXT: v_or_b32_e32 v16, v16, v24 2814; SDAG-NEXT: v_or_b32_e32 v23, v23, v17 2815; SDAG-NEXT: v_or_b32_e32 v19, v19, v21 2816; SDAG-NEXT: v_or_b32_e32 v22, v22, v16 2817; SDAG-NEXT: v_or_b32_e32 v18, v18, v20 2818; SDAG-NEXT: .LBB3_12: ; %Flow12 2819; SDAG-NEXT: s_or_b64 exec, exec, s[6:7] 2820; SDAG-NEXT: v_mul_lo_u32 v20, v32, v11 2821; SDAG-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v32, v10, 0 2822; SDAG-NEXT: v_mul_lo_u32 v28, v30, v10 2823; SDAG-NEXT: v_mul_lo_u32 v29, v33, v8 2824; SDAG-NEXT: v_mul_lo_u32 v33, v31, v9 2825; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v8, v32, 0 2826; SDAG-NEXT: v_mov_b32_e32 v21, 0 2827; SDAG-NEXT: v_mul_lo_u32 v34, v18, v15 2828; SDAG-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v18, v14, 0 2829; SDAG-NEXT: v_mul_lo_u32 v35, v19, v14 2830; SDAG-NEXT: v_mul_lo_u32 v23, v23, v12 2831; SDAG-NEXT: v_mul_lo_u32 v36, v22, v13 2832; SDAG-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v12, v18, 0 2833; SDAG-NEXT: v_add_i32_e32 v17, vcc, v17, v20 2834; SDAG-NEXT: v_mov_b32_e32 v20, v11 2835; SDAG-NEXT: v_mad_u64_u32 v[26:27], s[4:5], v9, v32, v[20:21] 2836; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 2837; SDAG-NEXT: v_add_i32_e64 v20, s[4:5], v25, v34 2838; SDAG-NEXT: v_add_i32_e64 v17, s[4:5], v17, v28 2839; SDAG-NEXT: v_mov_b32_e32 v28, v27 2840; SDAG-NEXT: v_mov_b32_e32 v27, v21 2841; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v8, v30, v[26:27] 2842; SDAG-NEXT: v_add_i32_e64 v25, s[4:5], v20, v35 2843; SDAG-NEXT: v_mov_b32_e32 v20, v15 2844; SDAG-NEXT: v_mad_u64_u32 v[26:27], s[4:5], v13, v18, v[20:21] 2845; SDAG-NEXT: v_mad_u64_u32 v[15:16], s[4:5], v31, v8, v[16:17] 2846; SDAG-NEXT: v_mov_b32_e32 v8, v11 2847; SDAG-NEXT: v_add_i32_e64 v17, s[4:5], v28, v8 2848; SDAG-NEXT: v_addc_u32_e64 v18, s[4:5], 0, 0, s[4:5] 2849; SDAG-NEXT: v_mov_b32_e32 v8, v10 2850; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc 2851; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v22, v12, v[24:25] 2852; SDAG-NEXT: v_mov_b32_e32 v22, v27 2853; SDAG-NEXT: v_mov_b32_e32 v27, v21 2854; SDAG-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v12, v19, v[26:27] 2855; SDAG-NEXT: v_add_i32_e64 v16, s[4:5], v29, v16 2856; SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v9, v30, v[17:18] 2857; SDAG-NEXT: v_add_i32_e64 v17, s[4:5], v23, v11 2858; SDAG-NEXT: v_mov_b32_e32 v11, v21 2859; SDAG-NEXT: v_add_i32_e64 v11, s[4:5], v22, v11 2860; SDAG-NEXT: v_addc_u32_e64 v12, s[4:5], 0, 0, s[4:5] 2861; SDAG-NEXT: v_add_i32_e64 v16, s[4:5], v33, v16 2862; SDAG-NEXT: v_add_i32_e64 v17, s[4:5], v36, v17 2863; SDAG-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v13, v19, v[11:12] 2864; SDAG-NEXT: v_add_i32_e64 v8, s[4:5], v8, v15 2865; SDAG-NEXT: v_addc_u32_e64 v9, s[4:5], v9, v16, s[4:5] 2866; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v2, v8, vcc 2867; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc 2868; SDAG-NEXT: v_add_i32_e32 v8, vcc, v11, v10 2869; SDAG-NEXT: v_addc_u32_e32 v9, vcc, v12, v17, vcc 2870; SDAG-NEXT: v_mov_b32_e32 v10, v20 2871; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v4, v14 2872; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v5, v10, vcc 2873; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v6, v8, vcc 2874; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v7, v9, vcc 2875; SDAG-NEXT: s_setpc_b64 s[30:31] 2876; 2877; GISEL-LABEL: v_urem_v2i128_vv: 2878; GISEL: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases 2879; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2880; GISEL-NEXT: v_or_b32_e32 v16, v8, v10 2881; GISEL-NEXT: v_or_b32_e32 v17, v9, v11 2882; GISEL-NEXT: v_or_b32_e32 v18, v0, v2 2883; GISEL-NEXT: v_or_b32_e32 v19, v1, v3 2884; GISEL-NEXT: v_ffbh_u32_e32 v22, v9 2885; GISEL-NEXT: v_ffbh_u32_e32 v23, v8 2886; GISEL-NEXT: v_ffbh_u32_e32 v24, v11 2887; GISEL-NEXT: v_ffbh_u32_e32 v25, v10 2888; GISEL-NEXT: v_ffbh_u32_e32 v26, v1 2889; GISEL-NEXT: v_ffbh_u32_e32 v27, v0 2890; GISEL-NEXT: v_ffbh_u32_e32 v28, v2 2891; GISEL-NEXT: v_ffbh_u32_e32 v29, v3 2892; GISEL-NEXT: v_mov_b32_e32 v20, 0x7f 2893; GISEL-NEXT: v_mov_b32_e32 v21, 0 2894; GISEL-NEXT: s_mov_b64 s[8:9], 0 2895; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] 2896; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19] 2897; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], 32, v23 2898; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], 32, v25 2899; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], 32, v27 2900; GISEL-NEXT: v_add_i32_e64 v19, s[6:7], 32, v28 2901; GISEL-NEXT: v_min_u32_e32 v16, v22, v16 2902; GISEL-NEXT: v_min_u32_e32 v17, v24, v17 2903; GISEL-NEXT: v_min_u32_e32 v18, v26, v18 2904; GISEL-NEXT: v_min_u32_e32 v19, v29, v19 2905; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2906; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[4:5] 2907; GISEL-NEXT: v_add_i32_e32 v16, vcc, 64, v16 2908; GISEL-NEXT: v_add_i32_e32 v18, vcc, 64, v18 2909; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 2910; GISEL-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc 2911; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] 2912; GISEL-NEXT: v_cndmask_b32_e32 v17, v19, v18, vcc 2913; GISEL-NEXT: v_sub_i32_e32 v18, vcc, v16, v17 2914; GISEL-NEXT: v_subb_u32_e64 v19, s[4:5], 0, 0, vcc 2915; GISEL-NEXT: v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5] 2916; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5] 2917; GISEL-NEXT: v_xor_b32_e32 v23, 0x7f, v18 2918; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[18:19], v[20:21] 2919; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc 2920; GISEL-NEXT: v_or_b32_e32 v20, v23, v16 2921; GISEL-NEXT: v_or_b32_e32 v21, v19, v17 2922; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[16:17] 2923; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc 2924; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] 2925; GISEL-NEXT: v_cndmask_b32_e32 v23, v23, v24, vcc 2926; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21] 2927; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc 2928; GISEL-NEXT: v_or_b32_e32 v21, v22, v23 2929; GISEL-NEXT: v_or_b32_e32 v20, v21, v20 2930; GISEL-NEXT: v_and_b32_e32 v21, 1, v21 2931; GISEL-NEXT: v_and_b32_e32 v20, 1, v20 2932; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 2933; GISEL-NEXT: v_cndmask_b32_e64 v32, v0, 0, vcc 2934; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v20 2935; GISEL-NEXT: v_cndmask_b32_e64 v20, v2, 0, vcc 2936; GISEL-NEXT: v_cndmask_b32_e64 v21, v3, 0, vcc 2937; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1 2938; GISEL-NEXT: v_cndmask_b32_e64 v33, v1, 0, vcc 2939; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] 2940; GISEL-NEXT: s_cbranch_execz .LBB3_6 2941; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 2942; GISEL-NEXT: v_add_i32_e32 v30, vcc, 1, v18 2943; GISEL-NEXT: v_addc_u32_e64 v31, s[4:5], 0, v19, vcc 2944; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v18 2945; GISEL-NEXT: v_not_b32_e32 v18, 63 2946; GISEL-NEXT: v_addc_u32_e64 v32, vcc, 0, v16, s[4:5] 2947; GISEL-NEXT: v_addc_u32_e32 v33, vcc, 0, v17, vcc 2948; GISEL-NEXT: v_add_i32_e64 v22, s[4:5], v26, v18 2949; GISEL-NEXT: v_sub_i32_e64 v20, s[4:5], 64, v26 2950; GISEL-NEXT: v_lshl_b64 v[16:17], v[0:1], v26 2951; GISEL-NEXT: v_lshl_b64 v[18:19], v[2:3], v26 2952; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 2953; GISEL-NEXT: v_lshr_b64 v[20:21], v[0:1], v20 2954; GISEL-NEXT: v_lshl_b64 v[24:25], v[0:1], v22 2955; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v26 2956; GISEL-NEXT: v_cndmask_b32_e32 v22, 0, v16, vcc 2957; GISEL-NEXT: v_cndmask_b32_e32 v23, 0, v17, vcc 2958; GISEL-NEXT: v_or_b32_e32 v16, v20, v18 2959; GISEL-NEXT: v_or_b32_e32 v17, v21, v19 2960; GISEL-NEXT: v_cndmask_b32_e32 v16, v24, v16, vcc 2961; GISEL-NEXT: v_cndmask_b32_e32 v17, v25, v17, vcc 2962; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v26 2963; GISEL-NEXT: v_cndmask_b32_e32 v20, v16, v2, vcc 2964; GISEL-NEXT: v_cndmask_b32_e32 v21, v17, v3, vcc 2965; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] 2966; GISEL-NEXT: v_mov_b32_e32 v19, s11 2967; GISEL-NEXT: v_mov_b32_e32 v18, s10 2968; GISEL-NEXT: v_mov_b32_e32 v17, s9 2969; GISEL-NEXT: v_mov_b32_e32 v16, s8 2970; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 2971; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7] 2972; GISEL-NEXT: s_cbranch_execz .LBB3_5 2973; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4 2974; GISEL-NEXT: v_add_i32_e32 v26, vcc, 0xffffffc0, v30 2975; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 64, v30 2976; GISEL-NEXT: v_lshr_b64 v[16:17], v[2:3], v30 2977; GISEL-NEXT: v_lshr_b64 v[18:19], v[0:1], v30 2978; GISEL-NEXT: s_mov_b64 s[4:5], 0 2979; GISEL-NEXT: v_add_i32_e32 v34, vcc, -1, v8 2980; GISEL-NEXT: v_addc_u32_e32 v35, vcc, -1, v9, vcc 2981; GISEL-NEXT: v_lshl_b64 v[24:25], v[2:3], v24 2982; GISEL-NEXT: v_lshr_b64 v[26:27], v[2:3], v26 2983; GISEL-NEXT: v_addc_u32_e32 v36, vcc, -1, v10, vcc 2984; GISEL-NEXT: v_addc_u32_e32 v37, vcc, -1, v11, vcc 2985; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] 2986; GISEL-NEXT: v_or_b32_e32 v18, v18, v24 2987; GISEL-NEXT: v_or_b32_e32 v19, v19, v25 2988; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v30 2989; GISEL-NEXT: v_cndmask_b32_e32 v18, v26, v18, vcc 2990; GISEL-NEXT: v_cndmask_b32_e32 v19, v27, v19, vcc 2991; GISEL-NEXT: v_cndmask_b32_e32 v26, 0, v16, vcc 2992; GISEL-NEXT: v_cndmask_b32_e32 v27, 0, v17, vcc 2993; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v30 2994; GISEL-NEXT: v_cndmask_b32_e32 v28, v18, v0, vcc 2995; GISEL-NEXT: v_cndmask_b32_e32 v29, v19, v1, vcc 2996; GISEL-NEXT: v_mov_b32_e32 v25, 0 2997; GISEL-NEXT: v_mov_b32_e32 v19, s7 2998; GISEL-NEXT: v_mov_b32_e32 v18, s6 2999; GISEL-NEXT: v_mov_b32_e32 v17, s5 3000; GISEL-NEXT: v_mov_b32_e32 v16, s4 3001; GISEL-NEXT: .LBB3_3: ; %udiv-do-while3 3002; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 3003; GISEL-NEXT: v_lshrrev_b32_e32 v38, 31, v23 3004; GISEL-NEXT: v_lshl_b64 v[18:19], v[22:23], 1 3005; GISEL-NEXT: v_lshrrev_b32_e32 v24, 31, v29 3006; GISEL-NEXT: v_lshl_b64 v[28:29], v[28:29], 1 3007; GISEL-NEXT: v_lshl_b64 v[26:27], v[26:27], 1 3008; GISEL-NEXT: v_lshrrev_b32_e32 v39, 31, v21 3009; GISEL-NEXT: v_add_i32_e32 v30, vcc, -1, v30 3010; GISEL-NEXT: v_addc_u32_e32 v31, vcc, -1, v31, vcc 3011; GISEL-NEXT: v_lshl_b64 v[20:21], v[20:21], 1 3012; GISEL-NEXT: v_or_b32_e32 v22, v16, v18 3013; GISEL-NEXT: v_or_b32_e32 v23, v17, v19 3014; GISEL-NEXT: v_or_b32_e32 v18, v26, v24 3015; GISEL-NEXT: v_or_b32_e32 v19, v28, v39 3016; GISEL-NEXT: v_addc_u32_e32 v32, vcc, -1, v32, vcc 3017; GISEL-NEXT: v_addc_u32_e32 v33, vcc, -1, v33, vcc 3018; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v34, v19 3019; GISEL-NEXT: v_subb_u32_e32 v16, vcc, v35, v29, vcc 3020; GISEL-NEXT: v_or_b32_e32 v16, v30, v32 3021; GISEL-NEXT: v_or_b32_e32 v17, v31, v33 3022; GISEL-NEXT: v_subb_u32_e32 v24, vcc, v36, v18, vcc 3023; GISEL-NEXT: v_subb_u32_e32 v24, vcc, v37, v27, vcc 3024; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] 3025; GISEL-NEXT: v_ashrrev_i32_e32 v16, 31, v24 3026; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3027; GISEL-NEXT: v_and_b32_e32 v17, v16, v8 3028; GISEL-NEXT: v_and_b32_e32 v26, v16, v9 3029; GISEL-NEXT: v_and_b32_e32 v39, v16, v10 3030; GISEL-NEXT: v_and_b32_e32 v48, v16, v11 3031; GISEL-NEXT: v_and_b32_e32 v24, 1, v16 3032; GISEL-NEXT: v_sub_i32_e32 v28, vcc, v19, v17 3033; GISEL-NEXT: v_subb_u32_e32 v29, vcc, v29, v26, vcc 3034; GISEL-NEXT: v_subb_u32_e32 v26, vcc, v18, v39, vcc 3035; GISEL-NEXT: v_subb_u32_e32 v27, vcc, v27, v48, vcc 3036; GISEL-NEXT: v_or_b32_e32 v20, v20, v38 3037; GISEL-NEXT: v_mov_b32_e32 v16, v24 3038; GISEL-NEXT: v_mov_b32_e32 v17, v25 3039; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5] 3040; GISEL-NEXT: s_cbranch_execnz .LBB3_3 3041; GISEL-NEXT: ; %bb.4: ; %Flow13 3042; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] 3043; GISEL-NEXT: .LBB3_5: ; %Flow14 3044; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] 3045; GISEL-NEXT: v_lshl_b64 v[18:19], v[22:23], 1 3046; GISEL-NEXT: v_lshl_b64 v[20:21], v[20:21], 1 3047; GISEL-NEXT: v_lshrrev_b32_e32 v22, 31, v23 3048; GISEL-NEXT: v_or_b32_e32 v20, v20, v22 3049; GISEL-NEXT: v_or_b32_e32 v32, v16, v18 3050; GISEL-NEXT: v_or_b32_e32 v33, v17, v19 3051; GISEL-NEXT: .LBB3_6: ; %Flow16 3052; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] 3053; GISEL-NEXT: s_mov_b64 s[8:9], 0 3054; GISEL-NEXT: v_or_b32_e32 v16, v12, v14 3055; GISEL-NEXT: v_or_b32_e32 v17, v13, v15 3056; GISEL-NEXT: v_or_b32_e32 v18, v4, v6 3057; GISEL-NEXT: v_or_b32_e32 v19, v5, v7 3058; GISEL-NEXT: v_ffbh_u32_e32 v22, v13 3059; GISEL-NEXT: v_ffbh_u32_e32 v23, v12 3060; GISEL-NEXT: v_ffbh_u32_e32 v26, v15 3061; GISEL-NEXT: v_ffbh_u32_e32 v27, v14 3062; GISEL-NEXT: v_ffbh_u32_e32 v28, v5 3063; GISEL-NEXT: v_ffbh_u32_e32 v29, v4 3064; GISEL-NEXT: v_ffbh_u32_e32 v30, v7 3065; GISEL-NEXT: v_ffbh_u32_e32 v31, v6 3066; GISEL-NEXT: v_mov_b32_e32 v24, 0x7f 3067; GISEL-NEXT: v_mov_b32_e32 v25, 0 3068; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] 3069; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19] 3070; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], 32, v23 3071; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], 32, v27 3072; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], 32, v29 3073; GISEL-NEXT: v_add_i32_e64 v19, s[6:7], 32, v31 3074; GISEL-NEXT: v_min_u32_e32 v16, v22, v16 3075; GISEL-NEXT: v_min_u32_e32 v17, v26, v17 3076; GISEL-NEXT: v_min_u32_e32 v18, v28, v18 3077; GISEL-NEXT: v_min_u32_e32 v19, v30, v19 3078; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3079; GISEL-NEXT: v_cndmask_b32_e64 v26, 0, 1, s[4:5] 3080; GISEL-NEXT: v_add_i32_e32 v16, vcc, 64, v16 3081; GISEL-NEXT: v_add_i32_e32 v18, vcc, 64, v18 3082; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] 3083; GISEL-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc 3084; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] 3085; GISEL-NEXT: v_cndmask_b32_e32 v17, v19, v18, vcc 3086; GISEL-NEXT: v_sub_i32_e32 v22, vcc, v16, v17 3087; GISEL-NEXT: v_subb_u32_e64 v23, s[4:5], 0, 0, vcc 3088; GISEL-NEXT: v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5] 3089; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5] 3090; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[22:23], v[24:25] 3091; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc 3092; GISEL-NEXT: v_xor_b32_e32 v18, 0x7f, v22 3093; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[16:17] 3094; GISEL-NEXT: v_cndmask_b32_e64 v25, 0, 1, vcc 3095; GISEL-NEXT: v_or_b32_e32 v18, v18, v16 3096; GISEL-NEXT: v_or_b32_e32 v19, v23, v17 3097; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] 3098; GISEL-NEXT: v_cndmask_b32_e32 v24, v25, v24, vcc 3099; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19] 3100; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 3101; GISEL-NEXT: v_or_b32_e32 v19, v26, v24 3102; GISEL-NEXT: v_and_b32_e32 v24, 1, v19 3103; GISEL-NEXT: v_or_b32_e32 v18, v19, v18 3104; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v24 3105; GISEL-NEXT: v_cndmask_b32_e64 v24, v4, 0, vcc 3106; GISEL-NEXT: v_and_b32_e32 v26, 1, v18 3107; GISEL-NEXT: v_cndmask_b32_e64 v25, v5, 0, vcc 3108; GISEL-NEXT: v_cndmask_b32_e64 v18, v6, 0, vcc 3109; GISEL-NEXT: v_cndmask_b32_e64 v19, v7, 0, vcc 3110; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 3111; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 3112; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] 3113; GISEL-NEXT: s_cbranch_execz .LBB3_12 3114; GISEL-NEXT: ; %bb.7: ; %udiv-bb1 3115; GISEL-NEXT: v_add_i32_e32 v34, vcc, 1, v22 3116; GISEL-NEXT: v_addc_u32_e64 v35, s[4:5], 0, v23, vcc 3117; GISEL-NEXT: v_sub_i32_e32 v28, vcc, 0x7f, v22 3118; GISEL-NEXT: v_not_b32_e32 v18, 63 3119; GISEL-NEXT: v_addc_u32_e64 v36, vcc, 0, v16, s[4:5] 3120; GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v17, vcc 3121; GISEL-NEXT: v_add_i32_e64 v24, s[4:5], v28, v18 3122; GISEL-NEXT: v_sub_i32_e64 v22, s[4:5], 64, v28 3123; GISEL-NEXT: v_lshl_b64 v[16:17], v[4:5], v28 3124; GISEL-NEXT: v_lshl_b64 v[18:19], v[6:7], v28 3125; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 3126; GISEL-NEXT: v_lshr_b64 v[22:23], v[4:5], v22 3127; GISEL-NEXT: v_lshl_b64 v[26:27], v[4:5], v24 3128; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v28 3129; GISEL-NEXT: v_cndmask_b32_e32 v24, 0, v16, vcc 3130; GISEL-NEXT: v_cndmask_b32_e32 v25, 0, v17, vcc 3131; GISEL-NEXT: v_or_b32_e32 v16, v22, v18 3132; GISEL-NEXT: v_or_b32_e32 v17, v23, v19 3133; GISEL-NEXT: v_cndmask_b32_e32 v16, v26, v16, vcc 3134; GISEL-NEXT: v_cndmask_b32_e32 v17, v27, v17, vcc 3135; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v28 3136; GISEL-NEXT: v_cndmask_b32_e32 v22, v16, v6, vcc 3137; GISEL-NEXT: v_cndmask_b32_e32 v23, v17, v7, vcc 3138; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] 3139; GISEL-NEXT: v_mov_b32_e32 v19, s11 3140; GISEL-NEXT: v_mov_b32_e32 v18, s10 3141; GISEL-NEXT: v_mov_b32_e32 v17, s9 3142; GISEL-NEXT: v_mov_b32_e32 v16, s8 3143; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 3144; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7] 3145; GISEL-NEXT: s_cbranch_execz .LBB3_11 3146; GISEL-NEXT: ; %bb.8: ; %udiv-preheader 3147; GISEL-NEXT: v_add_i32_e32 v28, vcc, 0xffffffc0, v34 3148; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 64, v34 3149; GISEL-NEXT: v_lshr_b64 v[16:17], v[6:7], v34 3150; GISEL-NEXT: v_lshr_b64 v[18:19], v[4:5], v34 3151; GISEL-NEXT: s_mov_b64 s[4:5], 0 3152; GISEL-NEXT: v_add_i32_e32 v38, vcc, -1, v12 3153; GISEL-NEXT: v_addc_u32_e32 v39, vcc, -1, v13, vcc 3154; GISEL-NEXT: v_lshl_b64 v[26:27], v[6:7], v26 3155; GISEL-NEXT: v_lshr_b64 v[28:29], v[6:7], v28 3156; GISEL-NEXT: v_addc_u32_e32 v48, vcc, -1, v14, vcc 3157; GISEL-NEXT: v_addc_u32_e32 v49, vcc, -1, v15, vcc 3158; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] 3159; GISEL-NEXT: v_or_b32_e32 v18, v18, v26 3160; GISEL-NEXT: v_or_b32_e32 v19, v19, v27 3161; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v34 3162; GISEL-NEXT: v_cndmask_b32_e32 v18, v28, v18, vcc 3163; GISEL-NEXT: v_cndmask_b32_e32 v19, v29, v19, vcc 3164; GISEL-NEXT: v_cndmask_b32_e32 v30, 0, v16, vcc 3165; GISEL-NEXT: v_cndmask_b32_e32 v31, 0, v17, vcc 3166; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v34 3167; GISEL-NEXT: v_cndmask_b32_e32 v28, v18, v4, vcc 3168; GISEL-NEXT: v_cndmask_b32_e32 v29, v19, v5, vcc 3169; GISEL-NEXT: v_mov_b32_e32 v27, 0 3170; GISEL-NEXT: v_mov_b32_e32 v19, s7 3171; GISEL-NEXT: v_mov_b32_e32 v18, s6 3172; GISEL-NEXT: v_mov_b32_e32 v17, s5 3173; GISEL-NEXT: v_mov_b32_e32 v16, s4 3174; GISEL-NEXT: .LBB3_9: ; %udiv-do-while 3175; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 3176; GISEL-NEXT: v_lshl_b64 v[18:19], v[24:25], 1 3177; GISEL-NEXT: v_lshrrev_b32_e32 v26, 31, v25 3178; GISEL-NEXT: v_lshl_b64 v[50:51], v[28:29], 1 3179; GISEL-NEXT: v_lshl_b64 v[30:31], v[30:31], 1 3180; GISEL-NEXT: v_lshrrev_b32_e32 v28, 31, v29 3181; GISEL-NEXT: v_lshrrev_b32_e32 v29, 31, v23 3182; GISEL-NEXT: v_lshl_b64 v[22:23], v[22:23], 1 3183; GISEL-NEXT: v_add_i32_e32 v34, vcc, -1, v34 3184; GISEL-NEXT: v_addc_u32_e32 v35, vcc, -1, v35, vcc 3185; GISEL-NEXT: v_or_b32_e32 v24, v16, v18 3186; GISEL-NEXT: v_or_b32_e32 v25, v17, v19 3187; GISEL-NEXT: v_or_b32_e32 v18, v30, v28 3188; GISEL-NEXT: v_or_b32_e32 v19, v50, v29 3189; GISEL-NEXT: v_or_b32_e32 v22, v22, v26 3190; GISEL-NEXT: v_addc_u32_e32 v36, vcc, -1, v36, vcc 3191; GISEL-NEXT: v_addc_u32_e32 v37, vcc, -1, v37, vcc 3192; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v38, v19 3193; GISEL-NEXT: v_subb_u32_e32 v16, vcc, v39, v51, vcc 3194; GISEL-NEXT: v_or_b32_e32 v16, v34, v36 3195; GISEL-NEXT: v_or_b32_e32 v17, v35, v37 3196; GISEL-NEXT: v_subb_u32_e32 v26, vcc, v48, v18, vcc 3197; GISEL-NEXT: v_subb_u32_e32 v26, vcc, v49, v31, vcc 3198; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] 3199; GISEL-NEXT: v_ashrrev_i32_e32 v16, 31, v26 3200; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 3201; GISEL-NEXT: v_and_b32_e32 v26, 1, v16 3202; GISEL-NEXT: v_and_b32_e32 v17, v16, v12 3203; GISEL-NEXT: v_and_b32_e32 v29, v16, v13 3204; GISEL-NEXT: v_and_b32_e32 v30, v16, v14 3205; GISEL-NEXT: v_and_b32_e32 v50, v16, v15 3206; GISEL-NEXT: v_sub_i32_e32 v28, vcc, v19, v17 3207; GISEL-NEXT: v_subb_u32_e32 v29, vcc, v51, v29, vcc 3208; GISEL-NEXT: v_mov_b32_e32 v16, v26 3209; GISEL-NEXT: v_mov_b32_e32 v17, v27 3210; GISEL-NEXT: v_subb_u32_e32 v30, vcc, v18, v30, vcc 3211; GISEL-NEXT: v_subb_u32_e32 v31, vcc, v31, v50, vcc 3212; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5] 3213; GISEL-NEXT: s_cbranch_execnz .LBB3_9 3214; GISEL-NEXT: ; %bb.10: ; %Flow 3215; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] 3216; GISEL-NEXT: .LBB3_11: ; %Flow11 3217; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] 3218; GISEL-NEXT: v_lshl_b64 v[26:27], v[24:25], 1 3219; GISEL-NEXT: v_lshl_b64 v[18:19], v[22:23], 1 3220; GISEL-NEXT: v_lshrrev_b32_e32 v22, 31, v25 3221; GISEL-NEXT: v_or_b32_e32 v18, v18, v22 3222; GISEL-NEXT: v_or_b32_e32 v24, v16, v26 3223; GISEL-NEXT: v_or_b32_e32 v25, v17, v27 3224; GISEL-NEXT: .LBB3_12: ; %Flow12 3225; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] 3226; GISEL-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v8, v32, 0 3227; GISEL-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v8, v20, 0 3228; GISEL-NEXT: v_mul_lo_u32 v28, v8, v21 3229; GISEL-NEXT: v_mul_lo_u32 v29, v9, v20 3230; GISEL-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v12, v24, 0 3231; GISEL-NEXT: v_mad_u64_u32 v[26:27], s[4:5], v12, v18, 0 3232; GISEL-NEXT: v_mul_lo_u32 v30, v12, v19 3233; GISEL-NEXT: v_mul_lo_u32 v31, v13, v18 3234; GISEL-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v9, v33, v[22:23] 3235; GISEL-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v13, v25, v[26:27] 3236; GISEL-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v10, v32, v[18:19] 3237; GISEL-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v14, v24, v[22:23] 3238; GISEL-NEXT: v_mad_u64_u32 v[17:18], vcc, v8, v33, v[17:18] 3239; GISEL-NEXT: v_mad_u64_u32 v[21:22], s[4:5], v12, v25, v[21:22] 3240; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[6:7], v9, v32, v[17:18] 3241; GISEL-NEXT: v_addc_u32_e64 v17, s[6:7], v19, v28, s[6:7] 3242; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v13, v24, v[21:22] 3243; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], v23, v30, s[6:7] 3244; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v17, v29, vcc 3245; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v16 3246; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc 3247; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], v18, v31, s[4:5] 3248; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v20 3249; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v5, v12, s[4:5] 3250; GISEL-NEXT: v_mad_u64_u32 v[16:17], s[6:7], v10, v33, v[17:18] 3251; GISEL-NEXT: v_mad_u64_u32 v[18:19], s[6:7], v14, v25, v[8:9] 3252; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[6:7], v11, v32, v[16:17] 3253; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[6:7], v15, v24, v[18:19] 3254; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v9, vcc 3255; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc 3256; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v6, v13, s[4:5] 3257; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v7, v11, vcc 3258; GISEL-NEXT: s_setpc_b64 s[30:31] 3259 %shl = urem <2 x i128> %lhs, %rhs 3260 ret <2 x i128> %shl 3261} 3262