1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s 4 5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. 6 7define i64 @v_sdiv_i64(i64 %num, i64 %den) { 8; CHECK-LABEL: v_sdiv_i64: 9; CHECK: ; %bb.0: 10; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; CHECK-NEXT: v_mov_b32_e32 v5, v1 12; CHECK-NEXT: v_mov_b32_e32 v4, v0 13; CHECK-NEXT: v_or_b32_e32 v1, v5, v3 14; CHECK-NEXT: v_mov_b32_e32 v0, 0 15; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 16; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 17; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 18; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 19; CHECK-NEXT: s_cbranch_execnz .LBB0_3 20; CHECK-NEXT: ; %bb.1: ; %Flow 21; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] 22; CHECK-NEXT: s_cbranch_execnz .LBB0_4 23; CHECK-NEXT: .LBB0_2: 24; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] 25; CHECK-NEXT: s_setpc_b64 s[30:31] 26; CHECK-NEXT: .LBB0_3: 27; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v3 28; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v0 29; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v0, vcc 30; CHECK-NEXT: v_xor_b32_e32 v2, v1, v0 31; CHECK-NEXT: v_xor_b32_e32 v1, v3, v0 32; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v2 33; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v1 34; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v2 35; CHECK-NEXT: v_subb_u32_e32 v11, vcc, 0, v1, vcc 36; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v6 37; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 38; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 39; CHECK-NEXT: v_mul_f32_e32 v6, 0x2f800000, v3 40; CHECK-NEXT: v_trunc_f32_e32 v8, v6 41; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v8 42; CHECK-NEXT: v_cvt_u32_f32_e32 v9, v3 43; CHECK-NEXT: v_cvt_u32_f32_e32 v12, v8 44; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v9, 0 45; CHECK-NEXT: v_mov_b32_e32 v3, v7 46; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[3:4] 47; CHECK-NEXT: v_mul_lo_u32 v3, v12, v6 48; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8] 49; CHECK-NEXT: v_mul_hi_u32 v8, v9, v6 50; CHECK-NEXT: v_mul_hi_u32 v6, v12, v6 51; CHECK-NEXT: v_mul_lo_u32 v13, v9, v7 52; CHECK-NEXT: v_mul_lo_u32 v14, v12, v7 53; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v13 54; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 55; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v8 56; CHECK-NEXT: v_mul_hi_u32 v8, v9, v7 57; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 58; CHECK-NEXT: v_add_i32_e32 v3, vcc, v13, v3 59; CHECK-NEXT: v_add_i32_e32 v6, vcc, v14, v6 60; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 61; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 62; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 63; CHECK-NEXT: v_add_i32_e32 v8, vcc, v13, v8 64; CHECK-NEXT: v_mul_hi_u32 v7, v12, v7 65; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 66; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 67; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 68; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 69; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v3 70; CHECK-NEXT: v_addc_u32_e32 v12, vcc, v12, v6, vcc 71; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v9, 0 72; CHECK-NEXT: v_mov_b32_e32 v3, v7 73; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[3:4] 74; CHECK-NEXT: v_ashrrev_i32_e32 v10, 31, v5 75; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v10 76; CHECK-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8] 77; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v5, v10, vcc 78; CHECK-NEXT: v_xor_b32_e32 v8, v3, v10 79; CHECK-NEXT: v_mul_lo_u32 v3, v12, v6 80; CHECK-NEXT: v_mul_lo_u32 v5, v9, v7 81; CHECK-NEXT: v_xor_b32_e32 v11, v4, v10 82; CHECK-NEXT: v_mul_hi_u32 v4, v9, v6 83; CHECK-NEXT: v_mul_hi_u32 v6, v12, v6 84; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 85; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 86; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 87; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 88; CHECK-NEXT: v_mul_lo_u32 v4, v12, v7 89; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 90; CHECK-NEXT: v_mul_hi_u32 v5, v9, v7 91; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 92; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 93; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 94; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 95; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 96; CHECK-NEXT: v_mul_hi_u32 v6, v12, v7 97; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 98; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 99; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 100; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 101; CHECK-NEXT: v_add_i32_e32 v3, vcc, v9, v3 102; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v12, v4, vcc 103; CHECK-NEXT: v_mul_lo_u32 v5, v11, v3 104; CHECK-NEXT: v_mul_lo_u32 v6, v8, v4 105; CHECK-NEXT: v_mul_hi_u32 v7, v8, v3 106; CHECK-NEXT: v_mul_hi_u32 v3, v11, v3 107; CHECK-NEXT: v_mul_hi_u32 v9, v11, v4 108; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 109; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 110; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 111; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 112; CHECK-NEXT: v_mul_lo_u32 v7, v11, v4 113; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 114; CHECK-NEXT: v_mul_hi_u32 v6, v8, v4 115; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 116; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 117; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 118; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 119; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 120; CHECK-NEXT: v_add_i32_e32 v7, vcc, v3, v5 121; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v7, 0 122; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 123; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 124; CHECK-NEXT: v_add_i32_e32 v6, vcc, v9, v5 125; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5] 126; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v8, v3 127; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v7, v[4:5] 128; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v11, v4, vcc 129; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v11, v4 130; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v1 131; CHECK-NEXT: v_subb_u32_e32 v4, vcc, v4, v1, vcc 132; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 133; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v2 134; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v2 135; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 136; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v1 137; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc 138; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v9, s[4:5] 139; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v7 140; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v6, vcc 141; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 142; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 143; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 144; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 145; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 146; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v2, vcc 147; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v8 148; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v9, vcc 149; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 150; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc 151; CHECK-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc 152; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 153; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc 154; CHECK-NEXT: v_xor_b32_e32 v3, v10, v0 155; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 156; CHECK-NEXT: v_xor_b32_e32 v0, v1, v3 157; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3 158; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 159; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 160; CHECK-NEXT: ; implicit-def: $vgpr2 161; CHECK-NEXT: ; implicit-def: $vgpr4 162; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] 163; CHECK-NEXT: s_cbranch_execz .LBB0_2 164; CHECK-NEXT: .LBB0_4: 165; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v2 166; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2 167; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 168; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 169; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 170; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0 171; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 172; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 173; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 174; CHECK-NEXT: v_mul_lo_u32 v1, v0, v2 175; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0 176; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1 177; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 178; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 179; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v2 180; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 181; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0 182; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 183; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 184; CHECK-NEXT: v_mov_b32_e32 v1, 0 185; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] 186; CHECK-NEXT: s_setpc_b64 s[30:31] 187 %result = sdiv i64 %num, %den 188 ret i64 %result 189} 190 191; FIXME: This is a workaround for not handling uniform VGPR case. 192declare i32 @llvm.amdgcn.readfirstlane(i32) 193 194define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { 195; CHECK-LABEL: s_sdiv_i64: 196; CHECK: ; %bb.0: 197; CHECK-NEXT: s_mov_b32 s6, 0 198; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 199; CHECK-NEXT: s_mov_b32 s7, -1 200; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7] 201; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[0:1], 0 202; CHECK-NEXT: s_mov_b32 s0, 1 203; CHECK-NEXT: s_cbranch_vccz .LBB1_2 204; CHECK-NEXT: ; %bb.1: 205; CHECK-NEXT: s_ashr_i32 s6, s3, 31 206; CHECK-NEXT: s_ashr_i32 s8, s5, 31 207; CHECK-NEXT: s_add_u32 s0, s2, s6 208; CHECK-NEXT: s_addc_u32 s1, s3, s6 209; CHECK-NEXT: s_add_u32 s10, s4, s8 210; CHECK-NEXT: s_mov_b32 s9, s8 211; CHECK-NEXT: s_addc_u32 s11, s5, s8 212; CHECK-NEXT: s_xor_b64 s[10:11], s[10:11], s[8:9] 213; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s10 214; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s11 215; CHECK-NEXT: s_mov_b32 s7, s6 216; CHECK-NEXT: s_xor_b64 s[12:13], s[0:1], s[6:7] 217; CHECK-NEXT: s_sub_u32 s3, 0, s10 218; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 219; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 220; CHECK-NEXT: s_subb_u32 s5, 0, s11 221; CHECK-NEXT: s_xor_b64 s[6:7], s[6:7], s[8:9] 222; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 223; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 224; CHECK-NEXT: v_trunc_f32_e32 v2, v1 225; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 226; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v0 227; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v2 228; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0 229; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2] 230; CHECK-NEXT: v_mul_hi_u32 v5, v3, v0 231; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v3, v[1:2] 232; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0 233; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 234; CHECK-NEXT: v_mul_lo_u32 v6, v3, v1 235; CHECK-NEXT: v_mul_lo_u32 v7, v4, v1 236; CHECK-NEXT: v_mul_hi_u32 v8, v3, v1 237; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 238; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 239; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 240; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 241; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 242; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 243; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 244; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 245; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 246; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 247; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 248; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 249; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 250; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 251; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 252; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v0 253; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc 254; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0 255; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v4, v[1:2] 256; CHECK-NEXT: v_mul_hi_u32 v6, v3, v0 257; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v3, v[1:2] 258; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0 259; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0 260; CHECK-NEXT: v_mul_lo_u32 v5, v3, v1 261; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 262; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 263; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 264; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 265; CHECK-NEXT: v_mul_lo_u32 v6, v4, v1 266; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 267; CHECK-NEXT: v_mul_hi_u32 v5, v3, v1 268; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0 269; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 270; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 271; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 272; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 273; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1 274; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 275; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 276; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 277; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 278; CHECK-NEXT: v_add_i32_e32 v0, vcc, v3, v0 279; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc 280; CHECK-NEXT: v_mul_lo_u32 v2, s13, v0 281; CHECK-NEXT: v_mul_lo_u32 v3, s12, v1 282; CHECK-NEXT: v_mul_hi_u32 v4, s12, v0 283; CHECK-NEXT: v_mul_hi_u32 v0, s13, v0 284; CHECK-NEXT: v_mul_hi_u32 v5, s13, v1 285; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 286; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 287; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 288; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 289; CHECK-NEXT: v_mul_lo_u32 v4, s13, v1 290; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 291; CHECK-NEXT: v_mul_hi_u32 v3, s12, v1 292; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0 293; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 294; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 295; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 296; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 297; CHECK-NEXT: v_add_i32_e32 v4, vcc, v0, v2 298; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v4, 0 299; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 300; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 301; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 302; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v2, v[1:2] 303; CHECK-NEXT: v_mov_b32_e32 v5, s13 304; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s12, v0 305; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v4, v[1:2] 306; CHECK-NEXT: v_mov_b32_e32 v3, s11 307; CHECK-NEXT: v_subb_u32_e64 v2, s[0:1], v5, v1, vcc 308; CHECK-NEXT: v_sub_i32_e64 v1, s[0:1], s13, v1 309; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v2 310; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 311; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] 312; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v0 313; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s10, v0 314; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 315; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v4 316; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] 317; CHECK-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v2 318; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s11, v1 319; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v6, s[0:1] 320; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 321; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s10, v0 322; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 323; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s11, v1 324; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc 325; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v3 326; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 327; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 328; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 329; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 330; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0 331; CHECK-NEXT: s_mov_b32 s0, 0 332; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 333; CHECK-NEXT: s_branch .LBB1_3 334; CHECK-NEXT: .LBB1_2: 335; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 336; CHECK-NEXT: .LBB1_3: ; %Flow 337; CHECK-NEXT: s_xor_b32 s0, s0, 1 338; CHECK-NEXT: s_and_b32 s0, s0, 1 339; CHECK-NEXT: s_cmp_lg_u32 s0, 0 340; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 341; CHECK-NEXT: ; %bb.4: 342; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4 343; CHECK-NEXT: s_sub_i32 s0, 0, s4 344; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 345; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 346; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 347; CHECK-NEXT: v_mul_lo_u32 v1, s0, v0 348; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 349; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 350; CHECK-NEXT: v_mul_hi_u32 v0, s2, v0 351; CHECK-NEXT: v_mul_lo_u32 v1, v0, s4 352; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 353; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s2, v1 354; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 355; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 356; CHECK-NEXT: v_subrev_i32_e64 v2, s[0:1], s4, v1 357; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 358; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 359; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 360; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 361; CHECK-NEXT: .LBB1_5: 362; CHECK-NEXT: v_readfirstlane_b32 s0, v0 363; CHECK-NEXT: s_mov_b32 s1, s0 364; CHECK-NEXT: ; return to shader part epilog 365 %result = sdiv i64 %num, %den 366 %cast = bitcast i64 %result to <2 x i32> 367 %elt.0 = extractelement <2 x i32> %cast, i32 0 368 %elt.1 = extractelement <2 x i32> %cast, i32 1 369 %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0) 370 %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1) 371 %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0 372 %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1 373 %cast.back = bitcast <2 x i32> %ins.1 to i64 374 ret i64 %cast.back 375} 376 377define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) { 378; GISEL-LABEL: v_sdiv_v2i64: 379; GISEL: ; %bb.0: 380; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 381; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v5 382; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 383; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc 384; GISEL-NEXT: v_xor_b32_e32 v10, v4, v8 385; GISEL-NEXT: v_xor_b32_e32 v4, v5, v8 386; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v10 387; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v4 388; GISEL-NEXT: v_sub_i32_e32 v15, vcc, 0, v10 389; GISEL-NEXT: v_subb_u32_e32 v16, vcc, 0, v4, vcc 390; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v9 391; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 392; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 393; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v5 394; GISEL-NEXT: v_trunc_f32_e32 v9, v9 395; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v9 396; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v5 397; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 398; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v14, 0 399; GISEL-NEXT: v_mov_b32_e32 v5, v12 400; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v9, v[5:6] 401; GISEL-NEXT: v_mul_lo_u32 v5, v9, v11 402; GISEL-NEXT: v_mul_hi_u32 v17, v14, v11 403; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v14, v[12:13] 404; GISEL-NEXT: v_mul_hi_u32 v11, v9, v11 405; GISEL-NEXT: v_mul_lo_u32 v13, v14, v12 406; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v13 407; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 408; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 409; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 410; GISEL-NEXT: v_mul_lo_u32 v17, v9, v12 411; GISEL-NEXT: v_add_i32_e32 v5, vcc, v13, v5 412; GISEL-NEXT: v_mul_hi_u32 v13, v14, v12 413; GISEL-NEXT: v_add_i32_e32 v11, vcc, v17, v11 414; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 415; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 416; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 417; GISEL-NEXT: v_add_i32_e32 v13, vcc, v17, v13 418; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 419; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5 420; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 421; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 422; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 423; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v5 424; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v9, v11, vcc 425; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v14, 0 426; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1 427; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 428; GISEL-NEXT: v_mov_b32_e32 v5, v12 429; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v17, v[5:6] 430; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc 431; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v14, v[12:13] 432; GISEL-NEXT: v_xor_b32_e32 v15, v0, v9 433; GISEL-NEXT: v_mul_lo_u32 v0, v17, v11 434; GISEL-NEXT: v_mul_lo_u32 v5, v14, v12 435; GISEL-NEXT: v_xor_b32_e32 v16, v1, v9 436; GISEL-NEXT: v_mul_hi_u32 v1, v14, v11 437; GISEL-NEXT: v_mul_hi_u32 v11, v17, v11 438; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 439; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 440; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 441; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 442; GISEL-NEXT: v_mul_lo_u32 v1, v17, v12 443; GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v0 444; GISEL-NEXT: v_mul_hi_u32 v5, v14, v12 445; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 446; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 447; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 448; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 449; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5 450; GISEL-NEXT: v_mul_hi_u32 v11, v17, v12 451; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 452; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 453; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 454; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 455; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 456; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc 457; GISEL-NEXT: v_mul_lo_u32 v5, v16, v0 458; GISEL-NEXT: v_mul_lo_u32 v11, v15, v1 459; GISEL-NEXT: v_mul_hi_u32 v12, v15, v0 460; GISEL-NEXT: v_mul_hi_u32 v0, v16, v0 461; GISEL-NEXT: v_xor_b32_e32 v8, v9, v8 462; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11 463; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 464; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12 465; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 466; GISEL-NEXT: v_mul_lo_u32 v12, v16, v1 467; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5 468; GISEL-NEXT: v_mul_hi_u32 v11, v15, v1 469; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 470; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 471; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 472; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 473; GISEL-NEXT: v_add_i32_e32 v13, vcc, v12, v11 474; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v5 475; GISEL-NEXT: v_mul_hi_u32 v1, v16, v1 476; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v10, v0, 0 477; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 478; GISEL-NEXT: v_add_i32_e32 v5, vcc, v13, v5 479; GISEL-NEXT: v_add_i32_e32 v14, vcc, v1, v5 480; GISEL-NEXT: v_mov_b32_e32 v1, v12 481; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v10, v14, v[1:2] 482; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v7 483; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v5 484; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v7, v5, vcc 485; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v4, v0, v[12:13] 486; GISEL-NEXT: v_xor_b32_e32 v7, v1, v5 487; GISEL-NEXT: v_xor_b32_e32 v6, v6, v5 488; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v7 489; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v6 490; GISEL-NEXT: v_sub_i32_e32 v15, vcc, v15, v11 491; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], v16, v12 492; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v13 493; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1 494; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], v16, v12, vcc 495; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v11, v4, vcc 496; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 497; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v1 498; GISEL-NEXT: v_trunc_f32_e32 v16, v11 499; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v16 500; GISEL-NEXT: v_cvt_u32_f32_e32 v18, v1 501; GISEL-NEXT: v_sub_i32_e32 v19, vcc, 0, v7 502; GISEL-NEXT: v_subb_u32_e32 v20, vcc, 0, v6, vcc 503; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[6:7], v19, v18, 0 504; GISEL-NEXT: v_cvt_u32_f32_e32 v16, v16 505; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v15, v10 506; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v15, v10 507; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v13, vcc 508; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 509; GISEL-NEXT: v_mov_b32_e32 v1, v12 510; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v19, v16, v[1:2] 511; GISEL-NEXT: v_mul_lo_u32 v1, v16, v11 512; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v15, v4 513; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v20, v18, v[12:13] 514; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, -1, s[4:5] 515; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v17, v4 516; GISEL-NEXT: v_mul_lo_u32 v10, v18, v12 517; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], v1, v10 518; GISEL-NEXT: v_mul_hi_u32 v10, v18, v11 519; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7] 520; GISEL-NEXT: v_mul_hi_u32 v11, v16, v11 521; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], v1, v10 522; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[8:9] 523; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc 524; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v15, v4 525; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v17, v4 526; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[8:9] 527; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 528; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v0 529; GISEL-NEXT: v_cndmask_b32_e64 v4, v15, v21, s[4:5] 530; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v14, vcc 531; GISEL-NEXT: v_add_i32_e32 v17, vcc, 1, v10 532; GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v15, vcc 533; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 534; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v17, vcc 535; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[6:7] 536; GISEL-NEXT: v_cndmask_b32_e32 v15, v15, v21, vcc 537; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 538; GISEL-NEXT: v_mul_lo_u32 v13, v16, v12 539; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 540; GISEL-NEXT: v_mul_hi_u32 v13, v18, v12 541; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 542; GISEL-NEXT: v_mul_hi_u32 v12, v16, v12 543; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 544; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 545; GISEL-NEXT: v_add_i32_e32 v13, vcc, v17, v13 546; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 547; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 548; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 549; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 550; GISEL-NEXT: v_add_i32_e32 v12, vcc, v18, v10 551; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v16, v11, vcc 552; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v19, v12, 0 553; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 554; GISEL-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 555; GISEL-NEXT: v_mov_b32_e32 v0, v11 556; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v13, v[0:1] 557; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3 558; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc 559; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v20, v12, v[0:1] 560; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v11 561; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v3, v11, vcc 562; GISEL-NEXT: v_xor_b32_e32 v15, v1, v11 563; GISEL-NEXT: v_mul_lo_u32 v1, v13, v10 564; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0 565; GISEL-NEXT: v_xor_b32_e32 v16, v2, v11 566; GISEL-NEXT: v_mul_hi_u32 v2, v12, v10 567; GISEL-NEXT: v_xor_b32_e32 v9, v4, v8 568; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 569; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 570; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2 571; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 572; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0 573; GISEL-NEXT: v_mul_hi_u32 v4, v13, v10 574; GISEL-NEXT: v_add_i32_e32 v1, vcc, v3, v1 575; GISEL-NEXT: v_mul_hi_u32 v3, v12, v0 576; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 577; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 578; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 579; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 580; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3 581; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 582; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v1 583; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 584; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 585; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 586; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 587; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v13, v0, vcc 588; GISEL-NEXT: v_mul_lo_u32 v2, v16, v1 589; GISEL-NEXT: v_mul_lo_u32 v3, v15, v0 590; GISEL-NEXT: v_mul_hi_u32 v4, v15, v1 591; GISEL-NEXT: v_mul_hi_u32 v1, v16, v1 592; GISEL-NEXT: v_xor_b32_e32 v10, v14, v8 593; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 594; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 595; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 596; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 597; GISEL-NEXT: v_mul_lo_u32 v4, v16, v0 598; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 599; GISEL-NEXT: v_mul_hi_u32 v3, v15, v0 600; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 601; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 602; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 603; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 604; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v3 605; GISEL-NEXT: v_add_i32_e32 v12, vcc, v1, v2 606; GISEL-NEXT: v_mul_hi_u32 v0, v16, v0 607; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v12, 0 608; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 609; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 610; GISEL-NEXT: v_add_i32_e32 v13, vcc, v0, v1 611; GISEL-NEXT: v_mov_b32_e32 v0, v3 612; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v7, v13, v[0:1] 613; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v9, v8 614; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v12, v[3:4] 615; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v10, v8, vcc 616; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v15, v2 617; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v16, v3, vcc 618; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v16, v3 619; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v6 620; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc 621; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 622; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v7 623; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 624; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 625; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v6 626; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 627; GISEL-NEXT: v_cndmask_b32_e64 v4, v8, v9, s[4:5] 628; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v12 629; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v13, vcc 630; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 631; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc 632; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v7 633; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 634; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v6 635; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc 636; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v8 637; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc 638; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 639; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc 640; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v6, vcc 641; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 642; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc 643; GISEL-NEXT: v_xor_b32_e32 v4, v11, v5 644; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc 645; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 646; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 647; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 648; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc 649; GISEL-NEXT: s_setpc_b64 s[30:31] 650; 651; CGP-LABEL: v_sdiv_v2i64: 652; CGP: ; %bb.0: 653; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 654; CGP-NEXT: v_mov_b32_e32 v11, v1 655; CGP-NEXT: v_mov_b32_e32 v10, v0 656; CGP-NEXT: v_or_b32_e32 v1, v11, v5 657; CGP-NEXT: v_mov_b32_e32 v0, 0 658; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 659; CGP-NEXT: v_mov_b32_e32 v8, v2 660; CGP-NEXT: v_mov_b32_e32 v9, v3 661; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 662; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 663; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 664; CGP-NEXT: s_cbranch_execz .LBB2_2 665; CGP-NEXT: ; %bb.1: 666; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v5 667; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v0 668; CGP-NEXT: v_addc_u32_e32 v3, vcc, v5, v0, vcc 669; CGP-NEXT: v_xor_b32_e32 v2, v1, v0 670; CGP-NEXT: v_xor_b32_e32 v1, v3, v0 671; CGP-NEXT: v_cvt_f32_u32_e32 v3, v2 672; CGP-NEXT: v_cvt_f32_u32_e32 v4, v1 673; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v2 674; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v1, vcc 675; CGP-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 676; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 677; CGP-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 678; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 679; CGP-NEXT: v_trunc_f32_e32 v5, v4 680; CGP-NEXT: v_mac_f32_e32 v3, 0xcf800000, v5 681; CGP-NEXT: v_cvt_u32_f32_e32 v12, v3 682; CGP-NEXT: v_cvt_u32_f32_e32 v15, v5 683; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v12, 0 684; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v15, v[4:5] 685; CGP-NEXT: v_mul_hi_u32 v16, v12, v3 686; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v12, v[4:5] 687; CGP-NEXT: v_mul_lo_u32 v5, v15, v3 688; CGP-NEXT: v_mul_hi_u32 v3, v15, v3 689; CGP-NEXT: v_mul_lo_u32 v17, v12, v4 690; CGP-NEXT: v_mul_lo_u32 v18, v15, v4 691; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17 692; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 693; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 694; CGP-NEXT: v_mul_hi_u32 v16, v12, v4 695; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 696; CGP-NEXT: v_add_i32_e32 v5, vcc, v17, v5 697; CGP-NEXT: v_add_i32_e32 v3, vcc, v18, v3 698; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 699; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v16 700; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 701; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 702; CGP-NEXT: v_mul_hi_u32 v4, v15, v4 703; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 704; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 705; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5 706; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 707; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v3 708; CGP-NEXT: v_addc_u32_e32 v15, vcc, v15, v4, vcc 709; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v12, 0 710; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v15, v[4:5] 711; CGP-NEXT: v_ashrrev_i32_e32 v13, 31, v11 712; CGP-NEXT: v_mul_hi_u32 v16, v12, v3 713; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v12, v[4:5] 714; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v13 715; CGP-NEXT: v_addc_u32_e32 v10, vcc, v11, v13, vcc 716; CGP-NEXT: v_xor_b32_e32 v11, v5, v13 717; CGP-NEXT: v_mul_lo_u32 v5, v15, v3 718; CGP-NEXT: v_mul_lo_u32 v14, v12, v4 719; CGP-NEXT: v_mul_hi_u32 v3, v15, v3 720; CGP-NEXT: v_xor_b32_e32 v10, v10, v13 721; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 722; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 723; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 724; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 725; CGP-NEXT: v_mul_lo_u32 v16, v15, v4 726; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 727; CGP-NEXT: v_mul_hi_u32 v14, v12, v4 728; CGP-NEXT: v_add_i32_e32 v3, vcc, v16, v3 729; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 730; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v14 731; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 732; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 733; CGP-NEXT: v_mul_hi_u32 v4, v15, v4 734; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 735; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 736; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 737; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 738; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3 739; CGP-NEXT: v_addc_u32_e32 v4, vcc, v15, v4, vcc 740; CGP-NEXT: v_mul_lo_u32 v5, v10, v3 741; CGP-NEXT: v_mul_lo_u32 v12, v11, v4 742; CGP-NEXT: v_mul_hi_u32 v14, v11, v3 743; CGP-NEXT: v_mul_hi_u32 v3, v10, v3 744; CGP-NEXT: v_mul_hi_u32 v15, v10, v4 745; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 746; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 747; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 748; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 749; CGP-NEXT: v_mul_lo_u32 v14, v10, v4 750; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 751; CGP-NEXT: v_mul_hi_u32 v12, v11, v4 752; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3 753; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 754; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v12 755; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 756; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 757; CGP-NEXT: v_add_i32_e32 v14, vcc, v3, v5 758; CGP-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v14, 0 759; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 760; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 761; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v5 762; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v12, v[4:5] 763; CGP-NEXT: v_sub_i32_e32 v3, vcc, v11, v3 764; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v14, v[4:5] 765; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v10, v4, vcc 766; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v10, v4 767; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v1 768; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v1, vcc 769; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 770; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v2 771; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v2 772; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] 773; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v1 774; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc 775; CGP-NEXT: v_cndmask_b32_e64 v5, v10, v11, s[4:5] 776; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v14 777; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc 778; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 779; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc 780; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 781; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 782; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 783; CGP-NEXT: v_cndmask_b32_e32 v1, v15, v2, vcc 784; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v10 785; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v11, vcc 786; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 787; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v2, vcc 788; CGP-NEXT: v_cndmask_b32_e32 v2, v11, v3, vcc 789; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 790; CGP-NEXT: v_cndmask_b32_e32 v1, v14, v1, vcc 791; CGP-NEXT: v_xor_b32_e32 v3, v13, v0 792; CGP-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc 793; CGP-NEXT: v_xor_b32_e32 v0, v1, v3 794; CGP-NEXT: v_xor_b32_e32 v1, v2, v3 795; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 796; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 797; CGP-NEXT: ; implicit-def: $vgpr4 798; CGP-NEXT: ; implicit-def: $vgpr10 799; CGP-NEXT: .LBB2_2: ; %Flow1 800; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] 801; CGP-NEXT: s_cbranch_execz .LBB2_4 802; CGP-NEXT: ; %bb.3: 803; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 804; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4 805; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 806; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 807; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 808; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 809; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 810; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 811; CGP-NEXT: v_mul_hi_u32 v0, v10, v0 812; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 813; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 814; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v1 815; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 816; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 817; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v4 818; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 819; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 820; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 821; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 822; CGP-NEXT: v_mov_b32_e32 v1, 0 823; CGP-NEXT: .LBB2_4: 824; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 825; CGP-NEXT: v_or_b32_e32 v3, v9, v7 826; CGP-NEXT: v_mov_b32_e32 v2, 0 827; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 828; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 829; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 830; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 831; CGP-NEXT: s_cbranch_execnz .LBB2_7 832; CGP-NEXT: ; %bb.5: ; %Flow 833; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] 834; CGP-NEXT: s_cbranch_execnz .LBB2_8 835; CGP-NEXT: .LBB2_6: 836; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 837; CGP-NEXT: s_setpc_b64 s[30:31] 838; CGP-NEXT: .LBB2_7: 839; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v7 840; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v2 841; CGP-NEXT: v_addc_u32_e32 v5, vcc, v7, v2, vcc 842; CGP-NEXT: v_xor_b32_e32 v4, v3, v2 843; CGP-NEXT: v_xor_b32_e32 v3, v5, v2 844; CGP-NEXT: v_cvt_f32_u32_e32 v5, v4 845; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 846; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v4 847; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc 848; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 849; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 850; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 851; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 852; CGP-NEXT: v_trunc_f32_e32 v7, v6 853; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 854; CGP-NEXT: v_cvt_u32_f32_e32 v10, v5 855; CGP-NEXT: v_cvt_u32_f32_e32 v13, v7 856; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v10, 0 857; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v13, v[6:7] 858; CGP-NEXT: v_mul_hi_u32 v14, v10, v5 859; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v10, v[6:7] 860; CGP-NEXT: v_mul_lo_u32 v7, v13, v5 861; CGP-NEXT: v_mul_hi_u32 v5, v13, v5 862; CGP-NEXT: v_mul_lo_u32 v15, v10, v6 863; CGP-NEXT: v_mul_lo_u32 v16, v13, v6 864; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v15 865; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 866; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 867; CGP-NEXT: v_mul_hi_u32 v14, v10, v6 868; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 869; CGP-NEXT: v_add_i32_e32 v7, vcc, v15, v7 870; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5 871; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 872; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 873; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 874; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 875; CGP-NEXT: v_mul_hi_u32 v6, v13, v6 876; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 877; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 878; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7 879; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 880; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v5 881; CGP-NEXT: v_addc_u32_e32 v13, vcc, v13, v6, vcc 882; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v10, 0 883; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v11, v13, v[6:7] 884; CGP-NEXT: v_ashrrev_i32_e32 v11, 31, v9 885; CGP-NEXT: v_mul_hi_u32 v14, v10, v5 886; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v10, v[6:7] 887; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v11 888; CGP-NEXT: v_addc_u32_e32 v8, vcc, v9, v11, vcc 889; CGP-NEXT: v_xor_b32_e32 v9, v7, v11 890; CGP-NEXT: v_mul_lo_u32 v7, v13, v5 891; CGP-NEXT: v_mul_lo_u32 v12, v10, v6 892; CGP-NEXT: v_mul_hi_u32 v5, v13, v5 893; CGP-NEXT: v_xor_b32_e32 v8, v8, v11 894; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 895; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 896; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 897; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 898; CGP-NEXT: v_mul_lo_u32 v14, v13, v6 899; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7 900; CGP-NEXT: v_mul_hi_u32 v12, v10, v6 901; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 902; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 903; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 904; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 905; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 906; CGP-NEXT: v_mul_hi_u32 v6, v13, v6 907; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 908; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 909; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v7 910; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 911; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 912; CGP-NEXT: v_addc_u32_e32 v6, vcc, v13, v6, vcc 913; CGP-NEXT: v_mul_lo_u32 v7, v8, v5 914; CGP-NEXT: v_mul_lo_u32 v10, v9, v6 915; CGP-NEXT: v_mul_hi_u32 v12, v9, v5 916; CGP-NEXT: v_mul_hi_u32 v5, v8, v5 917; CGP-NEXT: v_mul_hi_u32 v13, v8, v6 918; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 919; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 920; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 921; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 922; CGP-NEXT: v_mul_lo_u32 v12, v8, v6 923; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 924; CGP-NEXT: v_mul_hi_u32 v10, v9, v6 925; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 926; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 927; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 928; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 929; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 930; CGP-NEXT: v_add_i32_e32 v12, vcc, v5, v7 931; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v12, 0 932; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 933; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 934; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v7 935; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v10, v[6:7] 936; CGP-NEXT: v_sub_i32_e32 v5, vcc, v9, v5 937; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, v12, v[6:7] 938; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v8, v6, vcc 939; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v8, v6 940; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 941; CGP-NEXT: v_subb_u32_e32 v6, vcc, v6, v3, vcc 942; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 943; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v4 944; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v4 945; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 946; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v3 947; CGP-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc 948; CGP-NEXT: v_cndmask_b32_e64 v7, v8, v9, s[4:5] 949; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v12 950; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc 951; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v3 952; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc 953; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v4 954; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 955; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v3 956; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v4, vcc 957; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v8 958; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc 959; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 960; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc 961; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc 962; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 963; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc 964; CGP-NEXT: v_xor_b32_e32 v5, v11, v2 965; CGP-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc 966; CGP-NEXT: v_xor_b32_e32 v2, v3, v5 967; CGP-NEXT: v_xor_b32_e32 v3, v4, v5 968; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 969; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc 970; CGP-NEXT: ; implicit-def: $vgpr6 971; CGP-NEXT: ; implicit-def: $vgpr8 972; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] 973; CGP-NEXT: s_cbranch_execz .LBB2_6 974; CGP-NEXT: .LBB2_8: 975; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6 976; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6 977; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 978; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 979; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 980; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 981; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 982; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 983; CGP-NEXT: v_mul_hi_u32 v2, v8, v2 984; CGP-NEXT: v_mul_lo_u32 v3, v2, v6 985; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 986; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v3 987; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 988; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 989; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v6 990; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 991; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 992; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6 993; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 994; CGP-NEXT: v_mov_b32_e32 v3, 0 995; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 996; CGP-NEXT: s_setpc_b64 s[30:31] 997 %result = sdiv <2 x i64> %num, %den 998 ret <2 x i64> %result 999} 1000 1001define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { 1002; CHECK-LABEL: v_sdiv_i64_pow2k_denom: 1003; CHECK: ; %bb.0: 1004; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1005; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v1 1006; CHECK-NEXT: v_lshrrev_b32_e32 v2, 20, v2 1007; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 1008; CHECK-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1009; CHECK-NEXT: v_ashr_i64 v[0:1], v[0:1], 12 1010; CHECK-NEXT: s_setpc_b64 s[30:31] 1011 %result = sdiv i64 %num, 4096 1012 ret i64 %result 1013} 1014 1015define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { 1016; GISEL-LABEL: v_sdiv_v2i64_pow2k_denom: 1017; GISEL: ; %bb.0: 1018; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1019; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 1020; GISEL-NEXT: v_lshrrev_b32_e32 v4, 20, v4 1021; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v3 1022; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 1023; GISEL-NEXT: v_lshrrev_b32_e32 v5, 20, v5 1024; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1025; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 1026; GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1027; GISEL-NEXT: v_ashr_i64 v[0:1], v[0:1], 12 1028; GISEL-NEXT: v_ashr_i64 v[2:3], v[2:3], 12 1029; GISEL-NEXT: s_setpc_b64 s[30:31] 1030; 1031; CGP-LABEL: v_sdiv_v2i64_pow2k_denom: 1032; CGP: ; %bb.0: 1033; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1034; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 1035; CGP-NEXT: v_lshrrev_b32_e32 v4, 20, v4 1036; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 1037; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v3 1038; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1039; CGP-NEXT: v_lshrrev_b32_e32 v4, 20, v4 1040; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4 1041; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1042; CGP-NEXT: v_ashr_i64 v[0:1], v[0:1], 12 1043; CGP-NEXT: v_ashr_i64 v[2:3], v[2:3], 12 1044; CGP-NEXT: s_setpc_b64 s[30:31] 1045 %result = sdiv <2 x i64> %num, <i64 4096, i64 4096> 1046 ret <2 x i64> %result 1047} 1048 1049define i64 @v_sdiv_i64_oddk_denom(i64 %num) { 1050; CHECK-LABEL: v_sdiv_i64_oddk_denom: 1051; CHECK: ; %bb.0: 1052; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1053; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb 1054; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 1055; CHECK-NEXT: v_mov_b32_e32 v6, 0xffed2705 1056; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 1057; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 1058; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 1059; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 1060; CHECK-NEXT: v_trunc_f32_e32 v4, v3 1061; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4 1062; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v2 1063; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v4 1064; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0 1065; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4] 1066; CHECK-NEXT: v_mul_hi_u32 v8, v5, v2 1067; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4] 1068; CHECK-NEXT: v_mul_lo_u32 v4, v7, v2 1069; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2 1070; CHECK-NEXT: v_mul_lo_u32 v9, v5, v3 1071; CHECK-NEXT: v_mul_lo_u32 v10, v7, v3 1072; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3 1073; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 1074; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9 1075; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1076; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2 1077; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1078; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 1079; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1080; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 1081; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11 1082; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1083; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 1084; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 1085; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1086; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 1087; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 1088; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2 1089; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc 1090; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0 1091; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4] 1092; CHECK-NEXT: v_ashrrev_i32_e32 v6, 31, v1 1093; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6 1094; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4] 1095; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc 1096; CHECK-NEXT: v_xor_b32_e32 v4, v0, v6 1097; CHECK-NEXT: v_mul_lo_u32 v0, v7, v2 1098; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3 1099; CHECK-NEXT: v_xor_b32_e32 v9, v1, v6 1100; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2 1101; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2 1102; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 1103; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1104; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 1105; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1106; CHECK-NEXT: v_mul_lo_u32 v1, v7, v3 1107; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0 1108; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3 1109; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 1110; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1111; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 1112; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1113; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 1114; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3 1115; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0 1116; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1117; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 1118; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1 1119; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0 1120; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc 1121; CHECK-NEXT: v_mul_lo_u32 v2, v9, v0 1122; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1 1123; CHECK-NEXT: v_mul_hi_u32 v7, v4, v0 1124; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0 1125; CHECK-NEXT: v_mov_b32_e32 v5, 0x12d8fb 1126; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 1127; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1128; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 1129; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1130; CHECK-NEXT: v_mul_lo_u32 v7, v9, v1 1131; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 1132; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1 1133; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 1134; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1135; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3 1136; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1137; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3 1138; CHECK-NEXT: v_add_i32_e32 v7, vcc, v0, v2 1139; CHECK-NEXT: v_mul_hi_u32 v8, v9, v1 1140; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v7, 0 1141; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1142; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 1143; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v2 1144; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v3, v[1:2] 1145; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 1146; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc 1147; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1 1148; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1149; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 1150; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 1151; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 1152; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2 1153; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1154; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v4, s[4:5] 1155; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v7 1156; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v3, vcc 1157; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 1158; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 1159; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1160; CHECK-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 1161; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v4 1162; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v8, vcc 1163; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 1164; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1165; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v5, vcc 1166; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 1167; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc 1168; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 1169; CHECK-NEXT: v_xor_b32_e32 v0, v0, v6 1170; CHECK-NEXT: v_xor_b32_e32 v1, v1, v6 1171; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 1172; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc 1173; CHECK-NEXT: s_setpc_b64 s[30:31] 1174 %result = sdiv i64 %num, 1235195 1175 ret i64 %result 1176} 1177 1178define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { 1179; GISEL-LABEL: v_sdiv_v2i64_oddk_denom: 1180; GISEL: ; %bb.0: 1181; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1182; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb 1183; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 1184; GISEL-NEXT: v_mov_b32_e32 v6, 0xffed2705 1185; GISEL-NEXT: s_mov_b32 s6, 1 1186; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 1187; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 1188; GISEL-NEXT: s_cmp_lg_u32 s6, 0 1189; GISEL-NEXT: s_subb_u32 s6, 0, 0 1190; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 1191; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 1192; GISEL-NEXT: v_trunc_f32_e32 v8, v5 1193; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v8 1194; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4 1195; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 1196; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0 1197; GISEL-NEXT: v_mov_b32_e32 v9, v5 1198; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v8, v[9:10] 1199; GISEL-NEXT: v_mul_hi_u32 v11, v7, v4 1200; GISEL-NEXT: v_mul_hi_u32 v12, v8, v4 1201; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[9:10] 1202; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 1203; GISEL-NEXT: v_mul_lo_u32 v13, v7, v9 1204; GISEL-NEXT: v_mul_lo_u32 v4, v8, v9 1205; GISEL-NEXT: v_add_i32_e32 v13, vcc, v10, v13 1206; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1207; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v11 1208; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1209; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 1210; GISEL-NEXT: v_mul_hi_u32 v14, v7, v9 1211; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12 1212; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1213; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 1214; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1215; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 1216; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 1217; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v13 1218; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1219; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 1220; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 1221; GISEL-NEXT: v_add_i32_e32 v16, vcc, v7, v4 1222; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 1223; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc 1224; GISEL-NEXT: v_mov_b32_e32 v4, v14 1225; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] 1226; GISEL-NEXT: v_mul_lo_u32 v4, v17, v13 1227; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], s6, v16, v[14:15] 1228; GISEL-NEXT: s_mov_b32 s6, 1 1229; GISEL-NEXT: s_cmp_lg_u32 s6, 0 1230; GISEL-NEXT: v_mul_lo_u32 v9, v16, v14 1231; GISEL-NEXT: s_subb_u32 s6, 0, 0 1232; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 1233; GISEL-NEXT: v_mul_hi_u32 v9, v16, v13 1234; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1235; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 1236; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1237; GISEL-NEXT: v_mul_hi_u32 v9, v17, v13 1238; GISEL-NEXT: v_mul_lo_u32 v13, v17, v14 1239; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4 1240; GISEL-NEXT: v_mul_hi_u32 v15, v16, v14 1241; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 1242; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1243; GISEL-NEXT: v_add_i32_e32 v15, vcc, v9, v15 1244; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1245; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v9 1246; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1 1247; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 1248; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc 1249; GISEL-NEXT: v_xor_b32_e32 v18, v0, v9 1250; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v4 1251; GISEL-NEXT: v_mul_hi_u32 v4, v17, v14 1252; GISEL-NEXT: v_xor_b32_e32 v19, v1, v9 1253; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1254; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 1255; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 1256; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 1257; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc 1258; GISEL-NEXT: v_mul_lo_u32 v13, v19, v0 1259; GISEL-NEXT: v_mul_lo_u32 v14, v18, v1 1260; GISEL-NEXT: v_mul_hi_u32 v15, v18, v0 1261; GISEL-NEXT: v_mul_hi_u32 v0, v19, v0 1262; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb 1263; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1264; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1265; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 1266; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1267; GISEL-NEXT: v_mul_lo_u32 v15, v19, v1 1268; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 1269; GISEL-NEXT: v_mul_hi_u32 v14, v18, v1 1270; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 1271; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1272; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v14 1273; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1274; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 1275; GISEL-NEXT: v_add_i32_e32 v15, vcc, v0, v13 1276; GISEL-NEXT: v_mul_hi_u32 v16, v19, v1 1277; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v15, 0 1278; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1279; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 1280; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v13 1281; GISEL-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v16, v[1:2] 1282; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v18, v0 1283; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v19, v13, vcc 1284; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], v19, v13 1285; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 1286; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] 1287; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 1288; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v13, vcc 1289; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 1290; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1291; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v15 1292; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc 1293; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 1294; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 1295; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1296; GISEL-NEXT: v_cndmask_b32_e32 v18, -1, v0, vcc 1297; GISEL-NEXT: v_mov_b32_e32 v0, v5 1298; GISEL-NEXT: v_cndmask_b32_e64 v14, -1, v14, s[4:5] 1299; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] 1300; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v7, v[0:1] 1301; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13 1302; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v17, vcc 1303; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 1304; GISEL-NEXT: v_mul_lo_u32 v18, v7, v0 1305; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v1, vcc 1306; GISEL-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc 1307; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v18 1308; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1309; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 1310; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1311; GISEL-NEXT: v_mul_lo_u32 v11, v8, v0 1312; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 1313; GISEL-NEXT: v_mul_hi_u32 v10, v7, v0 1314; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1315; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1316; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1317; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1318; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1319; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 1320; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 1321; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1322; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1323; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 1324; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v1 1325; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc 1326; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0 1327; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 1328; GISEL-NEXT: v_cndmask_b32_e32 v11, v16, v5, vcc 1329; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2] 1330; GISEL-NEXT: v_xor_b32_e32 v1, v11, v9 1331; GISEL-NEXT: v_ashrrev_i32_e32 v11, 31, v3 1332; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v7, v[5:6] 1333; GISEL-NEXT: v_cndmask_b32_e32 v10, v15, v13, vcc 1334; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11 1335; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v11, vcc 1336; GISEL-NEXT: v_xor_b32_e32 v12, v2, v11 1337; GISEL-NEXT: v_mul_lo_u32 v2, v8, v0 1338; GISEL-NEXT: v_mul_lo_u32 v6, v7, v5 1339; GISEL-NEXT: v_xor_b32_e32 v13, v3, v11 1340; GISEL-NEXT: v_mul_hi_u32 v3, v7, v0 1341; GISEL-NEXT: v_mul_hi_u32 v0, v8, v0 1342; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 1343; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1344; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 1345; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1346; GISEL-NEXT: v_mul_lo_u32 v3, v8, v5 1347; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 1348; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5 1349; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 1350; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1351; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 1352; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1353; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 1354; GISEL-NEXT: v_mul_hi_u32 v5, v8, v5 1355; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 1356; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1357; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 1358; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 1359; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v0 1360; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc 1361; GISEL-NEXT: v_mul_lo_u32 v5, v13, v3 1362; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2 1363; GISEL-NEXT: v_xor_b32_e32 v10, v10, v9 1364; GISEL-NEXT: v_mul_hi_u32 v7, v12, v3 1365; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v9 1366; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc 1367; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 1368; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1369; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 1370; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1371; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2 1372; GISEL-NEXT: v_mul_hi_u32 v3, v13, v3 1373; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 1374; GISEL-NEXT: v_mul_hi_u32 v6, v12, v2 1375; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3 1376; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1377; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 1378; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1379; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 1380; GISEL-NEXT: v_add_i32_e32 v7, vcc, v3, v5 1381; GISEL-NEXT: v_mul_hi_u32 v8, v13, v2 1382; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0 1383; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1384; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 1385; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v5 1386; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4] 1387; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 1388; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc 1389; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 1390; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc 1391; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 1392; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 1393; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 1394; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 1395; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc 1396; GISEL-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5] 1397; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v7 1398; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc 1399; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 1400; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 1401; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 1402; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc 1403; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v6 1404; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc 1405; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 1406; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc 1407; GISEL-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc 1408; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 1409; GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc 1410; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc 1411; GISEL-NEXT: v_xor_b32_e32 v2, v2, v11 1412; GISEL-NEXT: v_xor_b32_e32 v3, v3, v11 1413; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 1414; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v11, vcc 1415; GISEL-NEXT: s_setpc_b64 s[30:31] 1416; 1417; CGP-LABEL: v_sdiv_v2i64_oddk_denom: 1418; CGP: ; %bb.0: 1419; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1420; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb 1421; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 1422; CGP-NEXT: v_mov_b32_e32 v6, 0xffed2705 1423; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 1424; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 1425; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 1426; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 1427; CGP-NEXT: v_trunc_f32_e32 v8, v5 1428; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v8 1429; CGP-NEXT: v_cvt_u32_f32_e32 v7, v4 1430; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 1431; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v7, 0 1432; CGP-NEXT: v_mov_b32_e32 v9, v5 1433; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v6, v8, v[9:10] 1434; CGP-NEXT: v_mul_hi_u32 v11, v7, v4 1435; CGP-NEXT: v_mul_hi_u32 v12, v8, v4 1436; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], -1, v7, v[9:10] 1437; CGP-NEXT: v_mul_lo_u32 v10, v8, v4 1438; CGP-NEXT: v_mul_lo_u32 v4, v7, v9 1439; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 1440; CGP-NEXT: v_mul_hi_u32 v14, v7, v9 1441; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 1442; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 1443; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1444; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 1445; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1446; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 1447; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v12 1448; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1449; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1450; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1451; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 1452; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 1453; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1454; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 1455; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v13 1456; CGP-NEXT: v_add_i32_e32 v16, vcc, v7, v4 1457; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0 1458; CGP-NEXT: v_addc_u32_e32 v17, vcc, v8, v9, vcc 1459; CGP-NEXT: v_mov_b32_e32 v4, v14 1460; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5] 1461; CGP-NEXT: v_mul_lo_u32 v4, v17, v13 1462; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15] 1463; CGP-NEXT: v_mul_lo_u32 v9, v16, v14 1464; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 1465; CGP-NEXT: v_mul_hi_u32 v9, v16, v13 1466; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1467; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 1468; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1469; CGP-NEXT: v_mul_hi_u32 v9, v17, v13 1470; CGP-NEXT: v_mul_lo_u32 v13, v17, v14 1471; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4 1472; CGP-NEXT: v_mul_hi_u32 v15, v16, v14 1473; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 1474; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1475; CGP-NEXT: v_add_i32_e32 v15, vcc, v9, v15 1476; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1477; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v9 1478; CGP-NEXT: v_ashrrev_i32_e32 v9, 31, v1 1479; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v9 1480; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v9, vcc 1481; CGP-NEXT: v_xor_b32_e32 v18, v0, v9 1482; CGP-NEXT: v_add_i32_e32 v0, vcc, v15, v4 1483; CGP-NEXT: v_mul_hi_u32 v4, v17, v14 1484; CGP-NEXT: v_xor_b32_e32 v19, v1, v9 1485; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1486; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1 1487; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v1 1488; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 1489; CGP-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc 1490; CGP-NEXT: v_mul_lo_u32 v13, v19, v0 1491; CGP-NEXT: v_mul_lo_u32 v14, v18, v1 1492; CGP-NEXT: v_mul_hi_u32 v15, v18, v0 1493; CGP-NEXT: v_mul_hi_u32 v0, v19, v0 1494; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb 1495; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1496; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1497; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 1498; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1499; CGP-NEXT: v_mul_lo_u32 v15, v19, v1 1500; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 1501; CGP-NEXT: v_mul_hi_u32 v14, v18, v1 1502; CGP-NEXT: v_add_i32_e32 v0, vcc, v15, v0 1503; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1504; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 1505; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1506; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 1507; CGP-NEXT: v_add_i32_e32 v15, vcc, v0, v13 1508; CGP-NEXT: v_mul_hi_u32 v16, v19, v1 1509; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v15, 0 1510; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1511; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 1512; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v13 1513; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v16, v[1:2] 1514; CGP-NEXT: v_sub_i32_e32 v0, vcc, v18, v0 1515; CGP-NEXT: v_subb_u32_e64 v1, s[4:5], v19, v13, vcc 1516; CGP-NEXT: v_sub_i32_e64 v13, s[4:5], v19, v13 1517; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 1518; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] 1519; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 1520; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v13, vcc 1521; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 1522; CGP-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v1, vcc 1523; CGP-NEXT: v_add_i32_e32 v17, vcc, 1, v15 1524; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v16, vcc 1525; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 1526; CGP-NEXT: v_mov_b32_e32 v0, v5 1527; CGP-NEXT: v_cndmask_b32_e64 v14, -1, v14, s[4:5] 1528; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, v[0:1] 1529; CGP-NEXT: v_cndmask_b32_e64 v19, 0, -1, vcc 1530; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13 1531; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v7, v[0:1] 1532; CGP-NEXT: v_cndmask_b32_e32 v5, -1, v19, vcc 1533; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v17 1534; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v18, vcc 1535; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 1536; CGP-NEXT: v_mul_lo_u32 v5, v7, v0 1537; CGP-NEXT: v_cndmask_b32_e32 v17, v17, v1, vcc 1538; CGP-NEXT: v_cndmask_b32_e32 v13, v18, v13, vcc 1539; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v5 1540; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1541; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 1542; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1543; CGP-NEXT: v_mul_lo_u32 v10, v8, v0 1544; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1 1545; CGP-NEXT: v_mul_hi_u32 v5, v7, v0 1546; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 1547; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1548; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 1549; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1550; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 1551; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 1552; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1 1553; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1554; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 1555; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v5 1556; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v1 1557; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v0, vcc 1558; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v7, 0 1559; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 1560; CGP-NEXT: v_cndmask_b32_e32 v5, v15, v17, vcc 1561; CGP-NEXT: v_xor_b32_e32 v11, v5, v9 1562; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v8, v[1:2] 1563; CGP-NEXT: v_cndmask_b32_e32 v10, v16, v13, vcc 1564; CGP-NEXT: v_xor_b32_e32 v1, v10, v9 1565; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v7, v[5:6] 1566; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v3 1567; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10 1568; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc 1569; CGP-NEXT: v_xor_b32_e32 v12, v2, v10 1570; CGP-NEXT: v_mul_lo_u32 v2, v8, v0 1571; CGP-NEXT: v_mul_lo_u32 v6, v7, v5 1572; CGP-NEXT: v_xor_b32_e32 v13, v3, v10 1573; CGP-NEXT: v_mul_hi_u32 v3, v7, v0 1574; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 1575; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 1576; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1577; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 1578; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1579; CGP-NEXT: v_mul_lo_u32 v3, v8, v5 1580; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 1581; CGP-NEXT: v_mul_hi_u32 v6, v7, v5 1582; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 1583; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1584; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 1585; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1586; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 1587; CGP-NEXT: v_mul_hi_u32 v5, v8, v5 1588; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 1589; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1590; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 1591; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 1592; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v0 1593; CGP-NEXT: v_addc_u32_e32 v2, vcc, v8, v2, vcc 1594; CGP-NEXT: v_mul_lo_u32 v5, v13, v3 1595; CGP-NEXT: v_mul_lo_u32 v6, v12, v2 1596; CGP-NEXT: v_mul_hi_u32 v7, v12, v3 1597; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v9 1598; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc 1599; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 1600; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1601; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 1602; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1603; CGP-NEXT: v_mul_lo_u32 v7, v13, v2 1604; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 1605; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 1606; CGP-NEXT: v_mul_hi_u32 v6, v12, v2 1607; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3 1608; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1609; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 1610; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1611; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 1612; CGP-NEXT: v_add_i32_e32 v7, vcc, v3, v5 1613; CGP-NEXT: v_mul_hi_u32 v8, v13, v2 1614; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0 1615; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1616; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 1617; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v5 1618; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4] 1619; CGP-NEXT: v_sub_i32_e32 v2, vcc, v12, v2 1620; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc 1621; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5 1622; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc 1623; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 1624; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 1625; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 1626; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 1627; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc 1628; CGP-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5] 1629; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v7 1630; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc 1631; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 1632; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 1633; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 1634; CGP-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc 1635; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v6 1636; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc 1637; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 1638; CGP-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc 1639; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc 1640; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 1641; CGP-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc 1642; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc 1643; CGP-NEXT: v_xor_b32_e32 v2, v2, v10 1644; CGP-NEXT: v_xor_b32_e32 v3, v3, v10 1645; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 1646; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc 1647; CGP-NEXT: s_setpc_b64 s[30:31] 1648 %result = sdiv <2 x i64> %num, <i64 1235195, i64 1235195> 1649 ret <2 x i64> %result 1650} 1651 1652define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) { 1653; CHECK-LABEL: v_sdiv_i64_pow2_shl_denom: 1654; CHECK: ; %bb.0: 1655; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1656; CHECK-NEXT: v_mov_b32_e32 v3, v0 1657; CHECK-NEXT: v_mov_b32_e32 v4, v1 1658; CHECK-NEXT: v_mov_b32_e32 v0, 0x1000 1659; CHECK-NEXT: v_mov_b32_e32 v1, 0 1660; CHECK-NEXT: v_lshl_b64 v[5:6], v[0:1], v2 1661; CHECK-NEXT: v_mov_b32_e32 v0, 0 1662; CHECK-NEXT: v_or_b32_e32 v1, v4, v6 1663; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 1664; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 1665; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 1666; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 1667; CHECK-NEXT: s_cbranch_execnz .LBB7_3 1668; CHECK-NEXT: ; %bb.1: ; %Flow 1669; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] 1670; CHECK-NEXT: s_cbranch_execnz .LBB7_4 1671; CHECK-NEXT: .LBB7_2: 1672; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] 1673; CHECK-NEXT: s_setpc_b64 s[30:31] 1674; CHECK-NEXT: .LBB7_3: 1675; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v6 1676; CHECK-NEXT: v_add_i32_e32 v1, vcc, v5, v0 1677; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v6, v0, vcc 1678; CHECK-NEXT: v_xor_b32_e32 v2, v1, v0 1679; CHECK-NEXT: v_xor_b32_e32 v1, v5, v0 1680; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v2 1681; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v1 1682; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v2 1683; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v1, vcc 1684; CHECK-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 1685; CHECK-NEXT: v_rcp_iflag_f32_e32 v5, v5 1686; CHECK-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 1687; CHECK-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 1688; CHECK-NEXT: v_trunc_f32_e32 v7, v6 1689; CHECK-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 1690; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v5 1691; CHECK-NEXT: v_cvt_u32_f32_e32 v11, v7 1692; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0 1693; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[6:7] 1694; CHECK-NEXT: v_mul_hi_u32 v12, v8, v5 1695; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] 1696; CHECK-NEXT: v_mul_lo_u32 v7, v11, v5 1697; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5 1698; CHECK-NEXT: v_mul_lo_u32 v13, v8, v6 1699; CHECK-NEXT: v_mul_lo_u32 v14, v11, v6 1700; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v13 1701; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1702; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12 1703; CHECK-NEXT: v_mul_hi_u32 v12, v8, v6 1704; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1705; CHECK-NEXT: v_add_i32_e32 v7, vcc, v13, v7 1706; CHECK-NEXT: v_add_i32_e32 v5, vcc, v14, v5 1707; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1708; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v12 1709; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1710; CHECK-NEXT: v_add_i32_e32 v12, vcc, v13, v12 1711; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6 1712; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 1713; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1714; CHECK-NEXT: v_add_i32_e32 v7, vcc, v12, v7 1715; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 1716; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v5 1717; CHECK-NEXT: v_addc_u32_e32 v11, vcc, v11, v6, vcc 1718; CHECK-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0 1719; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v9, v11, v[6:7] 1720; CHECK-NEXT: v_ashrrev_i32_e32 v9, 31, v4 1721; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9 1722; CHECK-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v10, v8, v[6:7] 1723; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v9, vcc 1724; CHECK-NEXT: v_xor_b32_e32 v7, v3, v9 1725; CHECK-NEXT: v_mul_lo_u32 v3, v11, v5 1726; CHECK-NEXT: v_mul_lo_u32 v10, v8, v6 1727; CHECK-NEXT: v_xor_b32_e32 v12, v4, v9 1728; CHECK-NEXT: v_mul_hi_u32 v4, v8, v5 1729; CHECK-NEXT: v_mul_hi_u32 v5, v11, v5 1730; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v10 1731; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1732; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 1733; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1734; CHECK-NEXT: v_mul_lo_u32 v4, v11, v6 1735; CHECK-NEXT: v_add_i32_e32 v3, vcc, v10, v3 1736; CHECK-NEXT: v_mul_hi_u32 v10, v8, v6 1737; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 1738; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1739; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 1740; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1741; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 1742; CHECK-NEXT: v_mul_hi_u32 v6, v11, v6 1743; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 1744; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1745; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 1746; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 1747; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 1748; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v11, v4, vcc 1749; CHECK-NEXT: v_mul_lo_u32 v5, v12, v3 1750; CHECK-NEXT: v_mul_lo_u32 v6, v7, v4 1751; CHECK-NEXT: v_mul_hi_u32 v8, v7, v3 1752; CHECK-NEXT: v_mul_hi_u32 v3, v12, v3 1753; CHECK-NEXT: v_mul_hi_u32 v10, v12, v4 1754; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 1755; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1756; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 1757; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1758; CHECK-NEXT: v_mul_lo_u32 v8, v12, v4 1759; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 1760; CHECK-NEXT: v_mul_hi_u32 v6, v7, v4 1761; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 1762; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1763; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 1764; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1765; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 1766; CHECK-NEXT: v_add_i32_e32 v8, vcc, v3, v5 1767; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v2, v8, 0 1768; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1769; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 1770; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v5 1771; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5] 1772; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v7, v3 1773; CHECK-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v8, v[4:5] 1774; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v12, v4, vcc 1775; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v12, v4 1776; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v1 1777; CHECK-NEXT: v_subb_u32_e32 v4, vcc, v4, v1, vcc 1778; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 1779; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v2 1780; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v2 1781; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 1782; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v1 1783; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc 1784; CHECK-NEXT: v_cndmask_b32_e64 v5, v7, v10, s[4:5] 1785; CHECK-NEXT: v_add_i32_e32 v7, vcc, 1, v8 1786; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc 1787; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 1788; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 1789; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 1790; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 1791; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 1792; CHECK-NEXT: v_cndmask_b32_e32 v1, v11, v2, vcc 1793; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v7 1794; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v10, vcc 1795; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 1796; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 1797; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc 1798; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 1799; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc 1800; CHECK-NEXT: v_xor_b32_e32 v3, v9, v0 1801; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 1802; CHECK-NEXT: v_xor_b32_e32 v0, v1, v3 1803; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3 1804; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 1805; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 1806; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6 1807; CHECK-NEXT: ; implicit-def: $vgpr3 1808; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] 1809; CHECK-NEXT: s_cbranch_execz .LBB7_2 1810; CHECK-NEXT: .LBB7_4: 1811; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5 1812; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5 1813; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 1814; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1815; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 1816; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0 1817; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 1818; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 1819; CHECK-NEXT: v_mul_hi_u32 v0, v3, v0 1820; CHECK-NEXT: v_mul_lo_u32 v1, v0, v5 1821; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 1822; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 1823; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 1824; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1825; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v5 1826; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1827; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 1828; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 1829; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1830; CHECK-NEXT: v_mov_b32_e32 v1, 0 1831; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] 1832; CHECK-NEXT: s_setpc_b64 s[30:31] 1833 %shl.y = shl i64 4096, %y 1834 %r = sdiv i64 %x, %shl.y 1835 ret i64 %r 1836} 1837 1838define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { 1839; GISEL-LABEL: v_sdiv_v2i64_pow2_shl_denom: 1840; GISEL: ; %bb.0: 1841; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1842; GISEL-NEXT: v_mov_b32_e32 v9, 0x1000 1843; GISEL-NEXT: v_mov_b32_e32 v10, 0 1844; GISEL-NEXT: v_lshl_b64 v[7:8], v[9:10], v4 1845; GISEL-NEXT: v_lshl_b64 v[9:10], v[9:10], v6 1846; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v8 1847; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v4 1848; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v8, v4, vcc 1849; GISEL-NEXT: v_xor_b32_e32 v8, v5, v4 1850; GISEL-NEXT: v_xor_b32_e32 v5, v7, v4 1851; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v8 1852; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v5 1853; GISEL-NEXT: v_sub_i32_e32 v15, vcc, 0, v8 1854; GISEL-NEXT: v_subb_u32_e32 v16, vcc, 0, v5, vcc 1855; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v11 1856; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7 1857; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v10 1858; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 1859; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7 1860; GISEL-NEXT: v_trunc_f32_e32 v13, v11 1861; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v13 1862; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v7 1863; GISEL-NEXT: v_cvt_u32_f32_e32 v17, v13 1864; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v14, 0 1865; GISEL-NEXT: v_mov_b32_e32 v7, v12 1866; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v17, v[7:8] 1867; GISEL-NEXT: v_mul_lo_u32 v7, v17, v11 1868; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v14, v[12:13] 1869; GISEL-NEXT: v_mul_lo_u32 v13, v14, v12 1870; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 1871; GISEL-NEXT: v_mul_hi_u32 v13, v14, v11 1872; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1873; GISEL-NEXT: v_mul_hi_u32 v11, v17, v11 1874; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 1875; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1876; GISEL-NEXT: v_mul_lo_u32 v13, v17, v12 1877; GISEL-NEXT: v_add_i32_e32 v7, vcc, v18, v7 1878; GISEL-NEXT: v_mul_hi_u32 v18, v14, v12 1879; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 1880; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1881; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18 1882; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1883; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18 1884; GISEL-NEXT: v_mul_hi_u32 v12, v17, v12 1885; GISEL-NEXT: v_add_i32_e32 v7, vcc, v11, v7 1886; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1887; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 1888; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1889; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v7 1890; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v17, v11, vcc 1891; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v14, 0 1892; GISEL-NEXT: v_mov_b32_e32 v7, v12 1893; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v15, v17, v[7:8] 1894; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v1 1895; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 1896; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v14, v[12:13] 1897; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc 1898; GISEL-NEXT: v_xor_b32_e32 v15, v0, v7 1899; GISEL-NEXT: v_mul_lo_u32 v0, v17, v11 1900; GISEL-NEXT: v_mul_lo_u32 v13, v14, v12 1901; GISEL-NEXT: v_xor_b32_e32 v16, v1, v7 1902; GISEL-NEXT: v_mul_hi_u32 v1, v14, v11 1903; GISEL-NEXT: v_mul_hi_u32 v11, v17, v11 1904; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13 1905; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1906; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 1907; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1908; GISEL-NEXT: v_mul_lo_u32 v1, v17, v12 1909; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 1910; GISEL-NEXT: v_mul_hi_u32 v13, v14, v12 1911; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 1912; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1913; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13 1914; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1915; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 1916; GISEL-NEXT: v_mul_hi_u32 v12, v17, v12 1917; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 1918; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1919; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 1920; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 1921; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 1922; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc 1923; GISEL-NEXT: v_mul_lo_u32 v11, v16, v0 1924; GISEL-NEXT: v_mul_lo_u32 v12, v15, v1 1925; GISEL-NEXT: v_mul_hi_u32 v13, v15, v0 1926; GISEL-NEXT: v_mul_hi_u32 v0, v16, v0 1927; GISEL-NEXT: v_xor_b32_e32 v7, v7, v4 1928; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 1929; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1930; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 1931; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1932; GISEL-NEXT: v_mul_lo_u32 v13, v16, v1 1933; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1934; GISEL-NEXT: v_mul_hi_u32 v12, v15, v1 1935; GISEL-NEXT: v_add_i32_e32 v0, vcc, v13, v0 1936; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1937; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 1938; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1939; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v12 1940; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 1941; GISEL-NEXT: v_mul_hi_u32 v1, v16, v1 1942; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v8, v0, 0 1943; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 1944; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 1945; GISEL-NEXT: v_add_i32_e32 v14, vcc, v1, v13 1946; GISEL-NEXT: v_mov_b32_e32 v1, v12 1947; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v8, v14, v[1:2] 1948; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v6 1949; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v10, v6, vcc 1950; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v5, v0, v[12:13] 1951; GISEL-NEXT: v_xor_b32_e32 v10, v1, v6 1952; GISEL-NEXT: v_xor_b32_e32 v9, v9, v6 1953; GISEL-NEXT: v_cvt_f32_u32_e32 v1, v10 1954; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v9 1955; GISEL-NEXT: v_sub_i32_e32 v15, vcc, v15, v11 1956; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], v16, v12 1957; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v13 1958; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1 1959; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], v16, v12, vcc 1960; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v11, v5, vcc 1961; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 1962; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v1 1963; GISEL-NEXT: v_trunc_f32_e32 v16, v11 1964; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v16 1965; GISEL-NEXT: v_cvt_u32_f32_e32 v18, v1 1966; GISEL-NEXT: v_sub_i32_e32 v19, vcc, 0, v10 1967; GISEL-NEXT: v_subb_u32_e32 v20, vcc, 0, v9, vcc 1968; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[6:7], v19, v18, 0 1969; GISEL-NEXT: v_cvt_u32_f32_e32 v16, v16 1970; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v15, v8 1971; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v15, v8 1972; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v13, vcc 1973; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v8 1974; GISEL-NEXT: v_mov_b32_e32 v1, v12 1975; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v19, v16, v[1:2] 1976; GISEL-NEXT: v_mul_lo_u32 v1, v16, v11 1977; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v15, v5 1978; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v20, v18, v[12:13] 1979; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, -1, s[4:5] 1980; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v17, v5 1981; GISEL-NEXT: v_mul_lo_u32 v8, v18, v12 1982; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], v1, v8 1983; GISEL-NEXT: v_mul_hi_u32 v8, v18, v11 1984; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7] 1985; GISEL-NEXT: v_mul_hi_u32 v11, v16, v11 1986; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], v1, v8 1987; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[8:9] 1988; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc 1989; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v15, v5 1990; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v17, v5 1991; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[8:9] 1992; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 1993; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v0 1994; GISEL-NEXT: v_cndmask_b32_e64 v5, v15, v21, s[4:5] 1995; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v14, vcc 1996; GISEL-NEXT: v_add_i32_e32 v17, vcc, 1, v8 1997; GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v15, vcc 1998; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 1999; GISEL-NEXT: v_cndmask_b32_e32 v1, v8, v17, vcc 2000; GISEL-NEXT: v_cndmask_b32_e32 v8, v15, v21, vcc 2001; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] 2002; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 2003; GISEL-NEXT: v_mul_lo_u32 v15, v16, v12 2004; GISEL-NEXT: v_add_i32_e32 v11, vcc, v15, v11 2005; GISEL-NEXT: v_mul_hi_u32 v15, v18, v12 2006; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 2007; GISEL-NEXT: v_mul_hi_u32 v12, v16, v12 2008; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 2009; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2010; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15 2011; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 2012; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2013; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 2014; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 2015; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v11 2016; GISEL-NEXT: v_addc_u32_e32 v15, vcc, v16, v12, vcc 2017; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v19, v13, 0 2018; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 2019; GISEL-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc 2020; GISEL-NEXT: v_mov_b32_e32 v0, v12 2021; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v19, v15, v[0:1] 2022; GISEL-NEXT: v_ashrrev_i32_e32 v12, 31, v3 2023; GISEL-NEXT: v_cndmask_b32_e32 v8, v14, v8, vcc 2024; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v20, v13, v[0:1] 2025; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v12 2026; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v3, v12, vcc 2027; GISEL-NEXT: v_xor_b32_e32 v14, v1, v12 2028; GISEL-NEXT: v_mul_lo_u32 v1, v15, v11 2029; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0 2030; GISEL-NEXT: v_xor_b32_e32 v16, v2, v12 2031; GISEL-NEXT: v_mul_hi_u32 v2, v13, v11 2032; GISEL-NEXT: v_mul_hi_u32 v4, v15, v11 2033; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 2034; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 2035; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2 2036; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2037; GISEL-NEXT: v_mul_lo_u32 v2, v15, v0 2038; GISEL-NEXT: v_add_i32_e32 v1, vcc, v3, v1 2039; GISEL-NEXT: v_mul_hi_u32 v3, v13, v0 2040; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 2041; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2042; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 2043; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 2044; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3 2045; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 2046; GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v1 2047; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2048; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 2049; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 2050; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 2051; GISEL-NEXT: v_addc_u32_e32 v0, vcc, v15, v0, vcc 2052; GISEL-NEXT: v_mul_lo_u32 v2, v16, v1 2053; GISEL-NEXT: v_mul_lo_u32 v3, v14, v0 2054; GISEL-NEXT: v_mul_hi_u32 v4, v14, v1 2055; GISEL-NEXT: v_mul_hi_u32 v1, v16, v1 2056; GISEL-NEXT: v_xor_b32_e32 v5, v5, v7 2057; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 2058; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 2059; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 2060; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2061; GISEL-NEXT: v_mul_lo_u32 v4, v16, v0 2062; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 2063; GISEL-NEXT: v_mul_hi_u32 v3, v14, v0 2064; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 2065; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2066; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 2067; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 2068; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v3 2069; GISEL-NEXT: v_add_i32_e32 v11, vcc, v1, v2 2070; GISEL-NEXT: v_mul_hi_u32 v0, v16, v0 2071; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v11, 0 2072; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2073; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1 2074; GISEL-NEXT: v_add_i32_e32 v13, vcc, v0, v1 2075; GISEL-NEXT: v_mov_b32_e32 v0, v3 2076; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v13, v[0:1] 2077; GISEL-NEXT: v_xor_b32_e32 v8, v8, v7 2078; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v5, v7 2079; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v11, v[3:4] 2080; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v7, vcc 2081; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v14, v2 2082; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v16, v3, vcc 2083; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v16, v3 2084; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v9 2085; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc 2086; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] 2087; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v10 2088; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 2089; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 2090; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v9 2091; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 2092; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v7, s[4:5] 2093; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v11 2094; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc 2095; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9 2096; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc 2097; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v10 2098; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 2099; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v9 2100; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 2101; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 2102; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v7, vcc 2103; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 2104; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc 2105; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v8, vcc 2106; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 2107; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc 2108; GISEL-NEXT: v_xor_b32_e32 v4, v12, v6 2109; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc 2110; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 2111; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 2112; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 2113; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc 2114; GISEL-NEXT: s_setpc_b64 s[30:31] 2115; 2116; CGP-LABEL: v_sdiv_v2i64_pow2_shl_denom: 2117; CGP: ; %bb.0: 2118; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2119; CGP-NEXT: v_mov_b32_e32 v5, v2 2120; CGP-NEXT: v_mov_b32_e32 v7, v3 2121; CGP-NEXT: v_mov_b32_e32 v2, 0x1000 2122; CGP-NEXT: v_mov_b32_e32 v3, 0 2123; CGP-NEXT: v_lshl_b64 v[11:12], v[2:3], v4 2124; CGP-NEXT: v_mov_b32_e32 v9, v1 2125; CGP-NEXT: v_mov_b32_e32 v8, v0 2126; CGP-NEXT: v_or_b32_e32 v1, v9, v12 2127; CGP-NEXT: v_mov_b32_e32 v0, 0 2128; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 2129; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 2130; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 2131; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 2132; CGP-NEXT: s_cbranch_execz .LBB8_2 2133; CGP-NEXT: ; %bb.1: 2134; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v12 2135; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v0 2136; CGP-NEXT: v_addc_u32_e32 v10, vcc, v12, v0, vcc 2137; CGP-NEXT: v_xor_b32_e32 v4, v1, v0 2138; CGP-NEXT: v_xor_b32_e32 v1, v10, v0 2139; CGP-NEXT: v_cvt_f32_u32_e32 v10, v4 2140; CGP-NEXT: v_cvt_f32_u32_e32 v11, v1 2141; CGP-NEXT: v_sub_i32_e32 v14, vcc, 0, v4 2142; CGP-NEXT: v_subb_u32_e32 v15, vcc, 0, v1, vcc 2143; CGP-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 2144; CGP-NEXT: v_rcp_iflag_f32_e32 v10, v10 2145; CGP-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 2146; CGP-NEXT: v_mul_f32_e32 v11, 0x2f800000, v10 2147; CGP-NEXT: v_trunc_f32_e32 v12, v11 2148; CGP-NEXT: v_mac_f32_e32 v10, 0xcf800000, v12 2149; CGP-NEXT: v_cvt_u32_f32_e32 v13, v10 2150; CGP-NEXT: v_cvt_u32_f32_e32 v16, v12 2151; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 2152; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[11:12] 2153; CGP-NEXT: v_mul_hi_u32 v17, v13, v10 2154; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] 2155; CGP-NEXT: v_mul_lo_u32 v12, v16, v10 2156; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 2157; CGP-NEXT: v_mul_lo_u32 v18, v13, v11 2158; CGP-NEXT: v_mul_lo_u32 v19, v16, v11 2159; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18 2160; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 2161; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17 2162; CGP-NEXT: v_mul_hi_u32 v17, v13, v11 2163; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2164; CGP-NEXT: v_add_i32_e32 v12, vcc, v18, v12 2165; CGP-NEXT: v_add_i32_e32 v10, vcc, v19, v10 2166; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 2167; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v17 2168; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 2169; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 2170; CGP-NEXT: v_mul_hi_u32 v11, v16, v11 2171; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 2172; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2173; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12 2174; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2175; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v10 2176; CGP-NEXT: v_addc_u32_e32 v16, vcc, v16, v11, vcc 2177; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0 2178; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v14, v16, v[11:12] 2179; CGP-NEXT: v_ashrrev_i32_e32 v14, 31, v9 2180; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14 2181; CGP-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v15, v13, v[11:12] 2182; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v14, vcc 2183; CGP-NEXT: v_xor_b32_e32 v12, v8, v14 2184; CGP-NEXT: v_mul_lo_u32 v8, v16, v10 2185; CGP-NEXT: v_mul_lo_u32 v15, v13, v11 2186; CGP-NEXT: v_xor_b32_e32 v17, v9, v14 2187; CGP-NEXT: v_mul_hi_u32 v9, v13, v10 2188; CGP-NEXT: v_mul_hi_u32 v10, v16, v10 2189; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v15 2190; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2191; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 2192; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2193; CGP-NEXT: v_mul_lo_u32 v9, v16, v11 2194; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8 2195; CGP-NEXT: v_mul_hi_u32 v15, v13, v11 2196; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 2197; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2198; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 2199; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2200; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v15 2201; CGP-NEXT: v_mul_hi_u32 v11, v16, v11 2202; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 2203; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2204; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 2205; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 2206; CGP-NEXT: v_add_i32_e32 v8, vcc, v13, v8 2207; CGP-NEXT: v_addc_u32_e32 v9, vcc, v16, v9, vcc 2208; CGP-NEXT: v_mul_lo_u32 v10, v17, v8 2209; CGP-NEXT: v_mul_lo_u32 v11, v12, v9 2210; CGP-NEXT: v_mul_hi_u32 v13, v12, v8 2211; CGP-NEXT: v_mul_hi_u32 v8, v17, v8 2212; CGP-NEXT: v_mul_hi_u32 v15, v17, v9 2213; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 2214; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2215; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 2216; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2217; CGP-NEXT: v_mul_lo_u32 v13, v17, v9 2218; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 2219; CGP-NEXT: v_mul_hi_u32 v11, v12, v9 2220; CGP-NEXT: v_add_i32_e32 v8, vcc, v13, v8 2221; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2222; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 2223; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2224; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 2225; CGP-NEXT: v_add_i32_e32 v13, vcc, v8, v10 2226; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v13, 0 2227; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2228; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 2229; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v10 2230; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v11, v[9:10] 2231; CGP-NEXT: v_sub_i32_e32 v8, vcc, v12, v8 2232; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v1, v13, v[9:10] 2233; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v17, v9, vcc 2234; CGP-NEXT: v_sub_i32_e64 v9, s[4:5], v17, v9 2235; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v1 2236; CGP-NEXT: v_subb_u32_e32 v9, vcc, v9, v1, vcc 2237; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] 2238; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v4 2239; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v4 2240; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] 2241; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v1 2242; CGP-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc 2243; CGP-NEXT: v_cndmask_b32_e64 v10, v12, v15, s[4:5] 2244; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v13 2245; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v11, vcc 2246; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v1 2247; CGP-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc 2248; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4 2249; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 2250; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v1 2251; CGP-NEXT: v_cndmask_b32_e32 v1, v16, v4, vcc 2252; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v12 2253; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v15, vcc 2254; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 2255; CGP-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc 2256; CGP-NEXT: v_cndmask_b32_e32 v4, v15, v8, vcc 2257; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 2258; CGP-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc 2259; CGP-NEXT: v_xor_b32_e32 v8, v14, v0 2260; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v4, vcc 2261; CGP-NEXT: v_xor_b32_e32 v0, v1, v8 2262; CGP-NEXT: v_xor_b32_e32 v1, v4, v8 2263; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 2264; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc 2265; CGP-NEXT: ; implicit-def: $vgpr11_vgpr12 2266; CGP-NEXT: ; implicit-def: $vgpr8 2267; CGP-NEXT: .LBB8_2: ; %Flow1 2268; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] 2269; CGP-NEXT: v_lshl_b64 v[9:10], v[2:3], v6 2270; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] 2271; CGP-NEXT: s_cbranch_execz .LBB8_4 2272; CGP-NEXT: ; %bb.3: 2273; CGP-NEXT: v_cvt_f32_u32_e32 v0, v11 2274; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v11 2275; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 2276; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2277; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 2278; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 2279; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 2280; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 2281; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 2282; CGP-NEXT: v_mul_lo_u32 v1, v0, v11 2283; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 2284; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v1 2285; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v11 2286; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2287; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v11 2288; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2289; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 2290; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v11 2291; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2292; CGP-NEXT: v_mov_b32_e32 v1, 0 2293; CGP-NEXT: .LBB8_4: 2294; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 2295; CGP-NEXT: v_or_b32_e32 v3, v7, v10 2296; CGP-NEXT: v_mov_b32_e32 v2, 0 2297; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 2298; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 2299; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc 2300; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 2301; CGP-NEXT: s_cbranch_execnz .LBB8_7 2302; CGP-NEXT: ; %bb.5: ; %Flow 2303; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] 2304; CGP-NEXT: s_cbranch_execnz .LBB8_8 2305; CGP-NEXT: .LBB8_6: 2306; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 2307; CGP-NEXT: s_setpc_b64 s[30:31] 2308; CGP-NEXT: .LBB8_7: 2309; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v10 2310; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v2 2311; CGP-NEXT: v_addc_u32_e32 v6, vcc, v10, v2, vcc 2312; CGP-NEXT: v_xor_b32_e32 v4, v3, v2 2313; CGP-NEXT: v_xor_b32_e32 v3, v6, v2 2314; CGP-NEXT: v_cvt_f32_u32_e32 v6, v4 2315; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 2316; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v4 2317; CGP-NEXT: v_subb_u32_e32 v13, vcc, 0, v3, vcc 2318; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v8 2319; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6 2320; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 2321; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 2322; CGP-NEXT: v_trunc_f32_e32 v10, v8 2323; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v10 2324; CGP-NEXT: v_cvt_u32_f32_e32 v11, v6 2325; CGP-NEXT: v_cvt_u32_f32_e32 v14, v10 2326; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0 2327; CGP-NEXT: v_mov_b32_e32 v6, v9 2328; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v14, v[6:7] 2329; CGP-NEXT: v_mul_lo_u32 v6, v14, v8 2330; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10] 2331; CGP-NEXT: v_mul_hi_u32 v10, v11, v8 2332; CGP-NEXT: v_mul_hi_u32 v8, v14, v8 2333; CGP-NEXT: v_mul_lo_u32 v15, v11, v9 2334; CGP-NEXT: v_mul_lo_u32 v16, v14, v9 2335; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v15 2336; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2337; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 2338; CGP-NEXT: v_mul_hi_u32 v10, v11, v9 2339; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2340; CGP-NEXT: v_add_i32_e32 v6, vcc, v15, v6 2341; CGP-NEXT: v_add_i32_e32 v8, vcc, v16, v8 2342; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2343; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 2344; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2345; CGP-NEXT: v_add_i32_e32 v10, vcc, v15, v10 2346; CGP-NEXT: v_mul_hi_u32 v9, v14, v9 2347; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 2348; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2349; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 2350; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 2351; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v6 2352; CGP-NEXT: v_addc_u32_e32 v14, vcc, v14, v8, vcc 2353; CGP-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0 2354; CGP-NEXT: v_mov_b32_e32 v6, v9 2355; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v14, v[6:7] 2356; CGP-NEXT: v_ashrrev_i32_e32 v12, 31, v7 2357; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 2358; CGP-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v11, v[9:10] 2359; CGP-NEXT: v_addc_u32_e32 v6, vcc, v7, v12, vcc 2360; CGP-NEXT: v_xor_b32_e32 v10, v5, v12 2361; CGP-NEXT: v_mul_lo_u32 v5, v14, v8 2362; CGP-NEXT: v_mul_lo_u32 v7, v11, v9 2363; CGP-NEXT: v_xor_b32_e32 v13, v6, v12 2364; CGP-NEXT: v_mul_hi_u32 v6, v11, v8 2365; CGP-NEXT: v_mul_hi_u32 v8, v14, v8 2366; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 2367; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 2368; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 2369; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 2370; CGP-NEXT: v_mul_lo_u32 v6, v14, v9 2371; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 2372; CGP-NEXT: v_mul_hi_u32 v7, v11, v9 2373; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 2374; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2375; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 2376; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 2377; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 2378; CGP-NEXT: v_mul_hi_u32 v8, v14, v9 2379; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 2380; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2381; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 2382; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 2383; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5 2384; CGP-NEXT: v_addc_u32_e32 v6, vcc, v14, v6, vcc 2385; CGP-NEXT: v_mul_lo_u32 v7, v13, v5 2386; CGP-NEXT: v_mul_lo_u32 v8, v10, v6 2387; CGP-NEXT: v_mul_hi_u32 v9, v10, v5 2388; CGP-NEXT: v_mul_hi_u32 v5, v13, v5 2389; CGP-NEXT: v_mul_hi_u32 v11, v13, v6 2390; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 2391; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2392; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 2393; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 2394; CGP-NEXT: v_mul_lo_u32 v9, v13, v6 2395; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 2396; CGP-NEXT: v_mul_hi_u32 v8, v10, v6 2397; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 2398; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2399; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 2400; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2401; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 2402; CGP-NEXT: v_add_i32_e32 v9, vcc, v5, v7 2403; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v9, 0 2404; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 2405; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 2406; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v7 2407; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v8, v[6:7] 2408; CGP-NEXT: v_sub_i32_e32 v5, vcc, v10, v5 2409; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, v9, v[6:7] 2410; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v13, v6, vcc 2411; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v13, v6 2412; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3 2413; CGP-NEXT: v_subb_u32_e32 v6, vcc, v6, v3, vcc 2414; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 2415; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v4 2416; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v4 2417; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] 2418; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v3 2419; CGP-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc 2420; CGP-NEXT: v_cndmask_b32_e64 v7, v10, v11, s[4:5] 2421; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v9 2422; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc 2423; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v3 2424; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc 2425; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v4 2426; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 2427; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v3 2428; CGP-NEXT: v_cndmask_b32_e32 v3, v13, v4, vcc 2429; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v10 2430; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v11, vcc 2431; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 2432; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v4, vcc 2433; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v5, vcc 2434; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 2435; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc 2436; CGP-NEXT: v_xor_b32_e32 v5, v12, v2 2437; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc 2438; CGP-NEXT: v_xor_b32_e32 v2, v3, v5 2439; CGP-NEXT: v_xor_b32_e32 v3, v4, v5 2440; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 2441; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc 2442; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10 2443; CGP-NEXT: ; implicit-def: $vgpr5 2444; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] 2445; CGP-NEXT: s_cbranch_execz .LBB8_6 2446; CGP-NEXT: .LBB8_8: 2447; CGP-NEXT: v_cvt_f32_u32_e32 v2, v9 2448; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v9 2449; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2 2450; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 2451; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 2452; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 2453; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 2454; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 2455; CGP-NEXT: v_mul_hi_u32 v2, v5, v2 2456; CGP-NEXT: v_mul_lo_u32 v3, v2, v9 2457; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 2458; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v3 2459; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9 2460; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2461; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v9 2462; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2463; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 2464; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9 2465; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2466; CGP-NEXT: v_mov_b32_e32 v3, 0 2467; CGP-NEXT: s_or_b64 exec, exec, s[6:7] 2468; CGP-NEXT: s_setpc_b64 s[30:31] 2469 %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y 2470 %r = sdiv <2 x i64> %x, %shl.y 2471 ret <2 x i64> %r 2472} 2473 2474define i64 @v_sdiv_i64_24bit(i64 %num, i64 %den) { 2475; GISEL-LABEL: v_sdiv_i64_24bit: 2476; GISEL: ; %bb.0: 2477; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2478; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v2 2479; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 2480; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 2481; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2482; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 2483; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 2484; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 2485; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 2486; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 2487; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 2488; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 2489; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 2490; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 2491; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 2492; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 2493; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2494; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 2495; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2496; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2 2497; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 2498; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 2499; GISEL-NEXT: v_mov_b32_e32 v1, 0 2500; GISEL-NEXT: s_setpc_b64 s[30:31] 2501; 2502; CGP-LABEL: v_sdiv_i64_24bit: 2503; CGP: ; %bb.0: 2504; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2505; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v2 2506; CGP-NEXT: v_cvt_f32_u32_e32 v1, v3 2507; CGP-NEXT: v_and_b32_e32 v5, 0xffffff, v0 2508; CGP-NEXT: v_rcp_f32_e32 v1, v1 2509; CGP-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 2510; CGP-NEXT: v_cvt_u32_f32_e32 v4, v1 2511; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v3 2512; CGP-NEXT: v_mul_lo_u32 v1, v1, v4 2513; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v1, 0 2514; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v2 2515; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v0, 0 2516; CGP-NEXT: v_mul_lo_u32 v0, v1, v3 2517; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v1 2518; CGP-NEXT: v_sub_i32_e32 v0, vcc, v5, v0 2519; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 2520; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2521; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v0, v3 2522; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2523; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v1 2524; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 2525; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc 2526; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 2527; CGP-NEXT: s_setpc_b64 s[30:31] 2528 %num.mask = and i64 %num, 16777215 2529 %den.mask = and i64 %den, 16777215 2530 %result = sdiv i64 %num.mask, %den.mask 2531 ret i64 %result 2532} 2533 2534define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { 2535; GISEL-LABEL: v_sdiv_v2i64_24bit: 2536; GISEL: ; %bb.0: 2537; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2538; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v4 2539; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1 2540; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 2541; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v1 2542; GISEL-NEXT: v_mac_f32_e32 v3, 0x4f800000, v5 2543; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3 2544; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], 0, 0, vcc 2545; GISEL-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 2546; GISEL-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 2547; GISEL-NEXT: v_trunc_f32_e32 v7, v4 2548; GISEL-NEXT: v_mac_f32_e32 v3, 0xcf800000, v7 2549; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v3 2550; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v7 2551; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v9, 0 2552; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[4:5] 2553; GISEL-NEXT: v_mul_lo_u32 v4, v12, v3 2554; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8] 2555; GISEL-NEXT: v_mul_hi_u32 v8, v9, v3 2556; GISEL-NEXT: v_mul_hi_u32 v3, v12, v3 2557; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7 2558; GISEL-NEXT: v_mul_lo_u32 v14, v12, v7 2559; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v13 2560; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2561; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 2562; GISEL-NEXT: v_mul_hi_u32 v8, v9, v7 2563; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2564; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4 2565; GISEL-NEXT: v_add_i32_e32 v3, vcc, v14, v3 2566; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2567; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v8 2568; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2569; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8 2570; GISEL-NEXT: v_mul_hi_u32 v7, v12, v7 2571; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 2572; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2573; GISEL-NEXT: v_add_i32_e32 v4, vcc, v8, v4 2574; GISEL-NEXT: v_add_i32_e32 v4, vcc, v7, v4 2575; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v3 2576; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v12, v4, vcc 2577; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v9, 0 2578; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v12, v[4:5] 2579; GISEL-NEXT: v_mul_lo_u32 v4, v12, v3 2580; GISEL-NEXT: v_and_b32_e32 v10, 0xffffff, v0 2581; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v9, v[7:8] 2582; GISEL-NEXT: v_mul_hi_u32 v0, v9, v3 2583; GISEL-NEXT: v_mul_hi_u32 v3, v12, v3 2584; GISEL-NEXT: v_mul_lo_u32 v8, v9, v7 2585; GISEL-NEXT: v_and_b32_e32 v11, 0xffffff, v2 2586; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 2587; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2588; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 2589; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 2590; GISEL-NEXT: v_mul_lo_u32 v4, v12, v7 2591; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 2592; GISEL-NEXT: v_mul_hi_u32 v8, v9, v7 2593; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3 2594; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2595; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v8 2596; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2597; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 2598; GISEL-NEXT: v_mul_hi_u32 v7, v12, v7 2599; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 2600; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 2601; GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3 2602; GISEL-NEXT: v_add_i32_e32 v3, vcc, v7, v3 2603; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 2604; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v12, v3, vcc 2605; GISEL-NEXT: v_mul_lo_u32 v7, 0, v0 2606; GISEL-NEXT: v_mul_lo_u32 v8, v10, v4 2607; GISEL-NEXT: v_and_b32_e32 v3, 0xffffff, v6 2608; GISEL-NEXT: v_mul_hi_u32 v6, v10, v0 2609; GISEL-NEXT: v_mul_hi_u32 v0, 0, v0 2610; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 2611; GISEL-NEXT: v_mul_lo_u32 v8, 0, v4 2612; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 2613; GISEL-NEXT: v_mul_hi_u32 v7, v10, v4 2614; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2615; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 2616; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 2617; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2618; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 2619; GISEL-NEXT: v_mul_hi_u32 v4, 0, v4 2620; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2621; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v0, 0 2622; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 2623; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v3 2624; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 2625; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v1, v4, v[7:8] 2626; GISEL-NEXT: v_mac_f32_e32 v9, 0x4f800000, v5 2627; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v9 2628; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], 0, v0, v[7:8] 2629; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v10, v6 2630; GISEL-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 2631; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2 2632; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], 0, v7, vcc 2633; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 0, v7 2634; GISEL-NEXT: v_trunc_f32_e32 v7, v5 2635; GISEL-NEXT: v_mac_f32_e32 v2, 0xcf800000, v7 2636; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v2 2637; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], 0, v3 2638; GISEL-NEXT: v_subb_u32_e64 v14, s[4:5], 0, 0, s[4:5] 2639; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v12, 0 2640; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v7 2641; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1 2642; GISEL-NEXT: v_mov_b32_e32 v2, v6 2643; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[4:5] 2644; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v15, v[2:3] 2645; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 2646; GISEL-NEXT: v_cndmask_b32_e64 v9, -1, v16, s[4:5] 2647; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v14, v12, v[6:7] 2648; GISEL-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v10, vcc 2649; GISEL-NEXT: v_mul_lo_u32 v7, v15, v5 2650; GISEL-NEXT: v_mul_lo_u32 v10, v12, v6 2651; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v8, v1 2652; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v2, vcc 2653; GISEL-NEXT: v_mul_hi_u32 v2, v12, v5 2654; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 2655; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2656; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 2657; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2658; GISEL-NEXT: v_mul_lo_u32 v7, v15, v6 2659; GISEL-NEXT: v_mul_hi_u32 v5, v15, v5 2660; GISEL-NEXT: v_add_i32_e32 v2, vcc, v10, v2 2661; GISEL-NEXT: v_mul_hi_u32 v10, v12, v6 2662; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 2663; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 2664; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 2665; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2666; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 2667; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6 2668; GISEL-NEXT: v_add_i32_e32 v2, vcc, v5, v2 2669; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 2670; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 2671; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5 2672; GISEL-NEXT: v_add_i32_e32 v7, vcc, v12, v2 2673; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v15, v5, vcc 2674; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v7, 0 2675; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v0 2676; GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v4, vcc 2677; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v1 2678; GISEL-NEXT: v_mov_b32_e32 v1, v6 2679; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v13, v10, v[1:2] 2680; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc 2681; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 2682; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v14, v7, v[1:2] 2683; GISEL-NEXT: v_cndmask_b32_e32 v6, -1, v8, vcc 2684; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v12 2685; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v15, vcc 2686; GISEL-NEXT: v_mul_lo_u32 v13, v10, v5 2687; GISEL-NEXT: v_mul_lo_u32 v14, v7, v1 2688; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 2689; GISEL-NEXT: v_mul_hi_u32 v6, v7, v5 2690; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc 2691; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v13, v14 2692; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 2693; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v12, v6 2694; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 2695; GISEL-NEXT: v_mul_lo_u32 v12, v10, v1 2696; GISEL-NEXT: v_mul_hi_u32 v5, v10, v5 2697; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v13, v6 2698; GISEL-NEXT: v_mul_hi_u32 v13, v7, v1 2699; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v12, v5 2700; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 2701; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 2702; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 2703; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13 2704; GISEL-NEXT: v_mul_hi_u32 v1, v10, v1 2705; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 2706; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 2707; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v12, v6 2708; GISEL-NEXT: v_add_i32_e64 v1, s[4:5], v1, v6 2709; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v7, v5 2710; GISEL-NEXT: v_addc_u32_e64 v1, s[4:5], v10, v1, s[4:5] 2711; GISEL-NEXT: v_mul_lo_u32 v6, 0, v5 2712; GISEL-NEXT: v_mul_lo_u32 v7, v11, v1 2713; GISEL-NEXT: v_mul_hi_u32 v10, v11, v5 2714; GISEL-NEXT: v_cndmask_b32_e32 v8, v15, v8, vcc 2715; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5 2716; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 2717; GISEL-NEXT: v_mul_lo_u32 v7, 0, v1 2718; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 2719; GISEL-NEXT: v_mul_hi_u32 v10, v11, v1 2720; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2721; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 2722; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 2723; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 2724; GISEL-NEXT: v_add_i32_e32 v10, vcc, v5, v6 2725; GISEL-NEXT: v_mul_hi_u32 v1, 0, v1 2726; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v10, 0 2727; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2728; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v12 2729; GISEL-NEXT: v_add_i32_e32 v12, vcc, v1, v7 2730; GISEL-NEXT: v_mov_b32_e32 v1, v6 2731; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, v12, v[1:2] 2732; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 2733; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2734; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v10, v[6:7] 2735; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc 2736; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v5 2737; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], 0, v6 2738; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], 0, v6, vcc 2739; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc 2740; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3 2741; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 2742; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 2743; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 2744; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc 2745; GISEL-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[4:5] 2746; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10 2747; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v12, vcc 2748; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v3 2749; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 2750; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 2751; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc 2752; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6 2753; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc 2754; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 2755; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc 2756; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc 2757; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 2758; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc 2759; GISEL-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc 2760; GISEL-NEXT: s_setpc_b64 s[30:31] 2761; 2762; CGP-LABEL: v_sdiv_v2i64_24bit: 2763; CGP: ; %bb.0: 2764; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2765; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v4 2766; CGP-NEXT: v_cvt_f32_u32_e32 v1, v3 2767; CGP-NEXT: v_and_b32_e32 v4, 0xffffff, v6 2768; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v3 2769; CGP-NEXT: v_rcp_f32_e32 v1, v1 2770; CGP-NEXT: v_and_b32_e32 v8, 0xffffff, v0 2771; CGP-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 2772; CGP-NEXT: v_cvt_u32_f32_e32 v5, v1 2773; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 2774; CGP-NEXT: v_mul_lo_u32 v6, v6, v5 2775; CGP-NEXT: v_rcp_f32_e32 v7, v1 2776; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v6, 0 2777; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v7 2778; CGP-NEXT: v_cvt_u32_f32_e32 v6, v0 2779; CGP-NEXT: v_add_i32_e32 v0, vcc, v5, v1 2780; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v0, 0 2781; CGP-NEXT: v_sub_i32_e32 v0, vcc, 0, v4 2782; CGP-NEXT: v_mul_lo_u32 v5, v1, v3 2783; CGP-NEXT: v_mul_lo_u32 v0, v0, v6 2784; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v1 2785; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 2786; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v3 2787; CGP-NEXT: v_cndmask_b32_e32 v7, v1, v7, vcc 2788; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v0, 0 2789; CGP-NEXT: v_and_b32_e32 v8, 0xffffff, v2 2790; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v5, v3 2791; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v6, v1 2792; CGP-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v1, 0 2793; CGP-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc 2794; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v7 2795; CGP-NEXT: v_mul_lo_u32 v5, v2, v4 2796; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 2797; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v1, vcc 2798; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 2799; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v5 2800; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2 2801; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4 2802; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 2803; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v3, v4 2804; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 2805; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2 2806; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4 2807; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 2808; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v2 2809; CGP-NEXT: s_setpc_b64 s[30:31] 2810 %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215> 2811 %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215> 2812 %result = sdiv <2 x i64> %num.mask, %den.mask 2813 ret <2 x i64> %result 2814} 2815 2816define i64 @v_sdiv_i64_exact(i64 %num) { 2817; CHECK-LABEL: v_sdiv_i64_exact: 2818; CHECK: ; %bb.0: 2819; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2820; CHECK-NEXT: v_ashr_i64 v[0:1], v[0:1], 12 2821; CHECK-NEXT: s_setpc_b64 s[30:31] 2822 %result = sdiv exact i64 %num, 4096 2823 ret i64 %result 2824} 2825 2826define <2 x i64> @v_sdiv_v2i64_exact(<2 x i64> %num) { 2827; CHECK-LABEL: v_sdiv_v2i64_exact: 2828; CHECK: ; %bb.0: 2829; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2830; CHECK-NEXT: v_ashr_i64 v[0:1], v[0:1], 12 2831; CHECK-NEXT: v_ashr_i64 v[2:3], v[2:3], 10 2832; CHECK-NEXT: s_setpc_b64 s[30:31] 2833 %result = sdiv exact <2 x i64> %num, <i64 4096, i64 1024> 2834 ret <2 x i64> %result 2835} 2836