1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=GFX8 %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 4; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 5 6define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %x, i32 %y) { 7; GFX8-LABEL: sdivrem_i32: 8; GFX8: ; %bb.0: 9; GFX8-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 10; GFX8-NEXT: s_waitcnt lgkmcnt(0) 11; GFX8-NEXT: s_ashr_i32 s6, s5, 31 12; GFX8-NEXT: s_add_i32 s0, s5, s6 13; GFX8-NEXT: s_xor_b32 s5, s0, s6 14; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s5 15; GFX8-NEXT: s_sub_i32 s0, 0, s5 16; GFX8-NEXT: s_ashr_i32 s7, s4, 31 17; GFX8-NEXT: s_add_i32 s4, s4, s7 18; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 19; GFX8-NEXT: s_xor_b32 s4, s4, s7 20; GFX8-NEXT: s_xor_b32 s6, s7, s6 21; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 22; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 23; GFX8-NEXT: v_mul_lo_u32 v1, s0, v0 24; GFX8-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 25; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 26; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 27; GFX8-NEXT: v_mul_hi_u32 v2, s4, v0 28; GFX8-NEXT: s_waitcnt lgkmcnt(0) 29; GFX8-NEXT: v_mov_b32_e32 v0, s0 30; GFX8-NEXT: v_mov_b32_e32 v1, s1 31; GFX8-NEXT: v_mul_lo_u32 v3, v2, s5 32; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 33; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s4, v3 34; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 35; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 36; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s5, v3 37; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 38; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 39; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 40; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 41; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s5, v3 42; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2 43; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 44; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s6, v2 45; GFX8-NEXT: v_xor_b32_e32 v3, s7, v3 46; GFX8-NEXT: flat_store_dword v[0:1], v2 47; GFX8-NEXT: v_mov_b32_e32 v0, s2 48; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s7, v3 49; GFX8-NEXT: v_mov_b32_e32 v1, s3 50; GFX8-NEXT: flat_store_dword v[0:1], v3 51; GFX8-NEXT: s_endpgm 52; 53; GFX9-LABEL: sdivrem_i32: 54; GFX9: ; %bb.0: 55; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x10 56; GFX9-NEXT: v_mov_b32_e32 v2, 0 57; GFX9-NEXT: s_waitcnt lgkmcnt(0) 58; GFX9-NEXT: s_ashr_i32 s4, s1, 31 59; GFX9-NEXT: s_add_i32 s1, s1, s4 60; GFX9-NEXT: s_xor_b32 s5, s1, s4 61; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s5 62; GFX9-NEXT: s_sub_i32 s1, 0, s5 63; GFX9-NEXT: s_ashr_i32 s6, s0, 31 64; GFX9-NEXT: s_add_i32 s0, s0, s6 65; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 66; GFX9-NEXT: s_xor_b32 s7, s0, s6 67; GFX9-NEXT: s_xor_b32 s4, s6, s4 68; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 69; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 70; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 71; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 72; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 73; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 74; GFX9-NEXT: v_mul_hi_u32 v0, s7, v0 75; GFX9-NEXT: v_mul_lo_u32 v1, v0, s5 76; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 77; GFX9-NEXT: v_sub_u32_e32 v1, s7, v1 78; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 79; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 80; GFX9-NEXT: v_subrev_u32_e32 v3, s5, v1 81; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 82; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 83; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 84; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 85; GFX9-NEXT: v_subrev_u32_e32 v3, s5, v1 86; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 87; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 88; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 89; GFX9-NEXT: v_xor_b32_e32 v1, s6, v1 90; GFX9-NEXT: v_subrev_u32_e32 v1, s6, v1 91; GFX9-NEXT: s_waitcnt lgkmcnt(0) 92; GFX9-NEXT: global_store_dword v2, v0, s[0:1] 93; GFX9-NEXT: global_store_dword v2, v1, s[2:3] 94; GFX9-NEXT: s_endpgm 95; 96; GFX10-LABEL: sdivrem_i32: 97; GFX10: ; %bb.0: 98; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x10 99; GFX10-NEXT: s_waitcnt lgkmcnt(0) 100; GFX10-NEXT: s_ashr_i32 s4, s1, 31 101; GFX10-NEXT: s_ashr_i32 s6, s0, 31 102; GFX10-NEXT: s_add_i32 s1, s1, s4 103; GFX10-NEXT: s_add_i32 s0, s0, s6 104; GFX10-NEXT: s_xor_b32 s5, s1, s4 105; GFX10-NEXT: s_xor_b32 s0, s0, s6 106; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s5 107; GFX10-NEXT: s_sub_i32 s1, 0, s5 108; GFX10-NEXT: s_xor_b32 s4, s6, s4 109; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 110; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 111; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 112; GFX10-NEXT: v_mul_lo_u32 v1, s1, v0 113; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 114; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 115; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 116; GFX10-NEXT: v_mul_lo_u32 v1, v0, s5 117; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 118; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 119; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 120; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s5, v1 121; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s5, v1 122; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 123; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 124; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 125; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s5, v1 126; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s5, v1 127; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 128; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 129; GFX10-NEXT: v_mov_b32_e32 v2, 0 130; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 131; GFX10-NEXT: v_xor_b32_e32 v1, s6, v1 132; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 133; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s6, v1 134; GFX10-NEXT: s_waitcnt lgkmcnt(0) 135; GFX10-NEXT: global_store_dword v2, v0, s[0:1] 136; GFX10-NEXT: global_store_dword v2, v1, s[2:3] 137; GFX10-NEXT: s_endpgm 138 %div = sdiv i32 %x, %y 139 store i32 %div, ptr addrspace(1) %out0 140 %rem = srem i32 %x, %y 141 store i32 %rem, ptr addrspace(1) %out1 142 ret void 143} 144 145define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i64 %x, i64 %y) { 146; GFX8-LABEL: sdivrem_i64: 147; GFX8: ; %bb.0: 148; GFX8-NEXT: s_load_dwordx8 s[4:11], s[8:9], 0x0 149; GFX8-NEXT: s_waitcnt lgkmcnt(0) 150; GFX8-NEXT: s_ashr_i32 s2, s9, 31 151; GFX8-NEXT: s_ashr_i32 s12, s11, 31 152; GFX8-NEXT: s_add_u32 s0, s8, s2 153; GFX8-NEXT: s_addc_u32 s1, s9, s2 154; GFX8-NEXT: s_add_u32 s8, s10, s12 155; GFX8-NEXT: s_mov_b32 s13, s12 156; GFX8-NEXT: s_addc_u32 s9, s11, s12 157; GFX8-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] 158; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s9 159; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s8 160; GFX8-NEXT: s_mov_b32 s3, s2 161; GFX8-NEXT: s_xor_b64 s[10:11], s[0:1], s[2:3] 162; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 163; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 164; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 165; GFX8-NEXT: s_sub_u32 s14, 0, s8 166; GFX8-NEXT: s_subb_u32 s15, 0, s9 167; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 168; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 169; GFX8-NEXT: v_trunc_f32_e32 v2, v1 170; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 171; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 172; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 173; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 174; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 175; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] 176; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 177; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] 178; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 179; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 180; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 181; GFX8-NEXT: v_mul_lo_u32 v7, v4, v1 182; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 183; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 184; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 185; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 186; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 187; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 188; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 189; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 190; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 191; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 192; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 193; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 194; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 195; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 196; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 197; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 198; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 199; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc 200; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s14, v3, 0 201; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s14, v4, v[1:2] 202; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 203; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s15, v3, v[1:2] 204; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 205; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 206; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 207; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 208; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 209; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 210; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 211; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 212; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 213; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 214; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 215; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 216; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 217; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 218; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 219; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 220; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 221; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 222; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 223; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 224; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 225; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc 226; GFX8-NEXT: v_mul_lo_u32 v2, s11, v0 227; GFX8-NEXT: v_mul_lo_u32 v3, s10, v1 228; GFX8-NEXT: v_mul_hi_u32 v4, s10, v0 229; GFX8-NEXT: v_mul_hi_u32 v0, s11, v0 230; GFX8-NEXT: v_mul_hi_u32 v5, s11, v1 231; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 232; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 233; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 234; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 235; GFX8-NEXT: v_mul_lo_u32 v4, s11, v1 236; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 237; GFX8-NEXT: v_mul_hi_u32 v3, s10, v1 238; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 239; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 240; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 241; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 242; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 243; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2 244; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0 245; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 246; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 247; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2 248; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2] 249; GFX8-NEXT: v_mov_b32_e32 v6, s11 250; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s10, v0 251; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2] 252; GFX8-NEXT: v_mov_b32_e32 v5, s9 253; GFX8-NEXT: v_subb_u32_e64 v2, s[0:1], v6, v1, vcc 254; GFX8-NEXT: v_sub_u32_e64 v1, s[0:1], s11, v1 255; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2 256; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] 257; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0 258; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] 259; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v2 260; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc 261; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[0:1] 262; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s8, v0 263; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v1, vcc 264; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v4 265; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1] 266; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v8 267; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] 268; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7 269; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc 270; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] 271; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v8 272; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s8, v7 273; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] 274; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v9 275; GFX8-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 276; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v10, s[0:1] 277; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 278; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc 279; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc 280; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 281; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc 282; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc 283; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[0:1] 284; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v10, s[0:1] 285; GFX8-NEXT: v_cndmask_b32_e64 v5, v0, v5, s[0:1] 286; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v1, s[0:1] 287; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[12:13] 288; GFX8-NEXT: v_xor_b32_e32 v0, s0, v4 289; GFX8-NEXT: v_xor_b32_e32 v1, s1, v3 290; GFX8-NEXT: v_mov_b32_e32 v3, s1 291; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 292; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 293; GFX8-NEXT: v_xor_b32_e32 v3, s2, v5 294; GFX8-NEXT: v_xor_b32_e32 v4, s2, v2 295; GFX8-NEXT: v_mov_b32_e32 v5, s2 296; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s2, v3 297; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v4, v5, vcc 298; GFX8-NEXT: v_mov_b32_e32 v4, s4 299; GFX8-NEXT: v_mov_b32_e32 v5, s5 300; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 301; GFX8-NEXT: v_mov_b32_e32 v0, s6 302; GFX8-NEXT: v_mov_b32_e32 v1, s7 303; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 304; GFX8-NEXT: s_endpgm 305; 306; GFX9-LABEL: sdivrem_i64: 307; GFX9: ; %bb.0: 308; GFX9-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x0 309; GFX9-NEXT: s_waitcnt lgkmcnt(0) 310; GFX9-NEXT: s_ashr_i32 s2, s17, 31 311; GFX9-NEXT: s_ashr_i32 s4, s19, 31 312; GFX9-NEXT: s_add_u32 s0, s16, s2 313; GFX9-NEXT: s_addc_u32 s1, s17, s2 314; GFX9-NEXT: s_add_u32 s6, s18, s4 315; GFX9-NEXT: s_mov_b32 s5, s4 316; GFX9-NEXT: s_addc_u32 s7, s19, s4 317; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[4:5] 318; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 319; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s6 320; GFX9-NEXT: s_mov_b32 s3, s2 321; GFX9-NEXT: s_xor_b64 s[8:9], s[0:1], s[2:3] 322; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 323; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 324; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 325; GFX9-NEXT: s_sub_u32 s10, 0, s6 326; GFX9-NEXT: s_subb_u32 s11, 0, s7 327; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 328; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 329; GFX9-NEXT: v_trunc_f32_e32 v2, v1 330; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 331; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 332; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 333; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 334; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v3, 0 335; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v4, v[1:2] 336; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 337; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v3, v[1:2] 338; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 339; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 340; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 341; GFX9-NEXT: v_mul_lo_u32 v7, v4, v1 342; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 343; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 344; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 345; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 346; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 347; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 348; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 349; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 350; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 351; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 352; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 353; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 354; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 355; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 356; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 357; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 358; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc 359; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s10, v3, 0 360; GFX9-NEXT: v_mov_b32_e32 v7, s7 361; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s10, v4, v[1:2] 362; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 363; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s11, v3, v[1:2] 364; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 365; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 366; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 367; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 368; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 369; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 370; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 371; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 372; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 373; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 374; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 375; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 376; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 377; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 378; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 379; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 380; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 381; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 382; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 383; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 384; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc 385; GFX9-NEXT: v_mul_lo_u32 v2, s9, v0 386; GFX9-NEXT: v_mul_lo_u32 v3, s8, v1 387; GFX9-NEXT: v_mul_hi_u32 v4, s8, v0 388; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 389; GFX9-NEXT: v_mul_hi_u32 v6, s9, v1 390; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 391; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 392; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 393; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 394; GFX9-NEXT: v_mul_lo_u32 v4, s9, v1 395; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 396; GFX9-NEXT: v_mul_hi_u32 v3, s8, v1 397; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 398; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 399; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 400; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 401; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2 402; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s6, v5, 0 403; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 404; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 405; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6 406; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s6, v3, v[1:2] 407; GFX9-NEXT: v_mov_b32_e32 v6, s9 408; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s8, v0 409; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s7, v5, v[1:2] 410; GFX9-NEXT: v_mov_b32_e32 v4, 0 411; GFX9-NEXT: v_subb_co_u32_e64 v2, s[0:1], v6, v1, vcc 412; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v2 413; GFX9-NEXT: v_sub_u32_e32 v1, s9, v1 414; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] 415; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v0 416; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1] 417; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s7, v2 418; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc 419; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v8, s[0:1] 420; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s6, v0 421; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v1, vcc 422; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v5 423; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1] 424; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s7, v9 425; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] 426; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s6, v8 427; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v7, vcc 428; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] 429; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s7, v9 430; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s6, v8 431; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[0:1] 432; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v10 433; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc 434; GFX9-NEXT: v_addc_co_u32_e64 v14, s[0:1], 0, v11, s[0:1] 435; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 436; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc 437; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc 438; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 439; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc 440; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc 441; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[0:1] 442; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1] 443; GFX9-NEXT: v_cndmask_b32_e64 v6, v0, v6, s[0:1] 444; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v1, s[0:1] 445; GFX9-NEXT: s_xor_b64 s[0:1], s[2:3], s[4:5] 446; GFX9-NEXT: v_xor_b32_e32 v0, s0, v5 447; GFX9-NEXT: v_xor_b32_e32 v1, s1, v3 448; GFX9-NEXT: v_mov_b32_e32 v3, s1 449; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s0, v0 450; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc 451; GFX9-NEXT: v_xor_b32_e32 v3, s2, v6 452; GFX9-NEXT: v_xor_b32_e32 v5, s2, v2 453; GFX9-NEXT: v_mov_b32_e32 v6, s2 454; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s2, v3 455; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v5, v6, vcc 456; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[12:13] 457; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[14:15] 458; GFX9-NEXT: s_endpgm 459; 460; GFX10-LABEL: sdivrem_i64: 461; GFX10: ; %bb.0: 462; GFX10-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x0 463; GFX10-NEXT: s_waitcnt lgkmcnt(0) 464; GFX10-NEXT: s_ashr_i32 s2, s17, 31 465; GFX10-NEXT: s_ashr_i32 s4, s19, 31 466; GFX10-NEXT: s_add_u32 s0, s16, s2 467; GFX10-NEXT: s_addc_u32 s1, s17, s2 468; GFX10-NEXT: s_add_u32 s6, s18, s4 469; GFX10-NEXT: s_mov_b32 s5, s4 470; GFX10-NEXT: s_addc_u32 s7, s19, s4 471; GFX10-NEXT: s_mov_b32 s3, s2 472; GFX10-NEXT: s_xor_b64 s[6:7], s[6:7], s[4:5] 473; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 474; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s7 475; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s6 476; GFX10-NEXT: s_sub_u32 s8, 0, s6 477; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 478; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 479; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 480; GFX10-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 481; GFX10-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 482; GFX10-NEXT: v_trunc_f32_e32 v2, v1 483; GFX10-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 484; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v2 485; GFX10-NEXT: v_add_f32_e32 v0, v1, v0 486; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0 487; GFX10-NEXT: v_mad_u64_u32 v[0:1], s9, s8, v3, 0 488; GFX10-NEXT: v_mad_u64_u32 v[1:2], s9, s8, v4, v[1:2] 489; GFX10-NEXT: s_subb_u32 s9, 0, s7 490; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0 491; GFX10-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5] 492; GFX10-NEXT: v_mad_u64_u32 v[1:2], s10, s9, v3, v[1:2] 493; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0 494; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 495; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 496; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1 497; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 498; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1 499; GFX10-NEXT: v_add_co_u32 v2, s10, v2, v5 500; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s10 501; GFX10-NEXT: v_add_co_u32 v6, s10, v7, v6 502; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s10 503; GFX10-NEXT: v_add_co_u32 v0, s10, v2, v0 504; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s10 505; GFX10-NEXT: v_add_co_u32 v2, s10, v6, v8 506; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s10 507; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 508; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 509; GFX10-NEXT: v_add_co_u32 v0, s10, v2, v0 510; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s10 511; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v3, v0 512; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1 513; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v4, v1, vcc_lo 514; GFX10-NEXT: v_mad_u64_u32 v[0:1], s10, s8, v3, 0 515; GFX10-NEXT: v_mad_u64_u32 v[1:2], s8, s8, v4, v[1:2] 516; GFX10-NEXT: v_mul_hi_u32 v6, v4, v0 517; GFX10-NEXT: v_mad_u64_u32 v[1:2], s8, s9, v3, v[1:2] 518; GFX10-NEXT: v_mul_lo_u32 v2, v4, v0 519; GFX10-NEXT: v_mul_hi_u32 v0, v3, v0 520; GFX10-NEXT: v_mul_lo_u32 v5, v3, v1 521; GFX10-NEXT: v_mul_lo_u32 v7, v4, v1 522; GFX10-NEXT: v_mul_hi_u32 v8, v3, v1 523; GFX10-NEXT: v_mul_hi_u32 v1, v4, v1 524; GFX10-NEXT: v_add_co_u32 v2, s8, v2, v5 525; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s8 526; GFX10-NEXT: v_add_co_u32 v6, s8, v7, v6 527; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s8 528; GFX10-NEXT: v_add_co_u32 v0, s8, v2, v0 529; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s8 530; GFX10-NEXT: v_add_co_u32 v2, s8, v6, v8 531; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s8 532; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0 533; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v6 534; GFX10-NEXT: v_add_co_u32 v0, s8, v2, v0 535; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s8 536; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v3, v0 537; GFX10-NEXT: v_add3_u32 v1, v5, v2, v1 538; GFX10-NEXT: v_mul_lo_u32 v2, s1, v0 539; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v4, v1, vcc_lo 540; GFX10-NEXT: v_mul_hi_u32 v4, s0, v0 541; GFX10-NEXT: v_mul_hi_u32 v0, s1, v0 542; GFX10-NEXT: v_mul_lo_u32 v3, s0, v1 543; GFX10-NEXT: v_mul_lo_u32 v5, s1, v1 544; GFX10-NEXT: v_add_co_u32 v2, s8, v2, v3 545; GFX10-NEXT: v_mul_hi_u32 v3, s0, v1 546; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s8 547; GFX10-NEXT: v_add_co_u32 v2, s8, v2, v4 548; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s8 549; GFX10-NEXT: v_add_co_u32 v0, s8, v5, v0 550; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s8 551; GFX10-NEXT: v_add_nc_u32_e32 v2, v6, v2 552; GFX10-NEXT: v_add_co_u32 v0, s8, v0, v3 553; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s8 554; GFX10-NEXT: v_add_co_u32 v5, s8, v0, v2 555; GFX10-NEXT: v_mul_hi_u32 v2, s1, v1 556; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s8 557; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3 558; GFX10-NEXT: v_mad_u64_u32 v[0:1], s8, s6, v5, 0 559; GFX10-NEXT: v_add3_u32 v3, v3, v6, v2 560; GFX10-NEXT: v_mad_u64_u32 v[1:2], s8, s6, v3, v[1:2] 561; GFX10-NEXT: v_mad_u64_u32 v[1:2], s8, s7, v5, v[1:2] 562; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v5, 1 563; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v3, vcc_lo 564; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, s0, v0 565; GFX10-NEXT: v_sub_nc_u32_e32 v6, s1, v1 566; GFX10-NEXT: v_sub_co_ci_u32_e64 v1, s0, s1, v1, vcc_lo 567; GFX10-NEXT: v_subrev_co_ci_u32_e32 v6, vcc_lo, s7, v6, vcc_lo 568; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s6, v0 569; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc_lo 570; GFX10-NEXT: v_sub_co_u32 v8, vcc_lo, v0, s6 571; GFX10-NEXT: v_subrev_co_ci_u32_e64 v9, s0, 0, v6, vcc_lo 572; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v1 573; GFX10-NEXT: v_subrev_co_ci_u32_e32 v6, vcc_lo, s7, v6, vcc_lo 574; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, -1, s0 575; GFX10-NEXT: v_cmp_le_u32_e64 s0, s6, v8 576; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, s0 577; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v9 578; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0 579; GFX10-NEXT: v_add_co_u32 v13, s0, v2, 1 580; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v4, s0 581; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v9 582; GFX10-NEXT: v_cndmask_b32_e64 v11, v12, v11, s0 583; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v1 584; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 585; GFX10-NEXT: v_cndmask_b32_e64 v7, v10, v7, s0 586; GFX10-NEXT: v_sub_co_u32 v10, s0, v8, s6 587; GFX10-NEXT: v_subrev_co_ci_u32_e64 v6, s0, 0, v6, s0 588; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v13, vcc_lo 589; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v7 590; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc_lo 591; GFX10-NEXT: v_cndmask_b32_e32 v7, v8, v10, vcc_lo 592; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo 593; GFX10-NEXT: v_cndmask_b32_e64 v2, v5, v2, s0 594; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v4, s0 595; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v7, s0 596; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v6, s0 597; GFX10-NEXT: v_mov_b32_e32 v4, 0 598; GFX10-NEXT: v_xor_b32_e32 v2, s4, v2 599; GFX10-NEXT: v_xor_b32_e32 v3, s5, v3 600; GFX10-NEXT: v_xor_b32_e32 v5, s2, v0 601; GFX10-NEXT: v_xor_b32_e32 v6, s2, v1 602; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v2, s4 603; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, s5, v3, vcc_lo 604; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v5, s2 605; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s2, v6, vcc_lo 606; GFX10-NEXT: global_store_dwordx2 v4, v[0:1], s[12:13] 607; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[14:15] 608; GFX10-NEXT: s_endpgm 609 %div = sdiv i64 %x, %y 610 store i64 %div, ptr addrspace(1) %out0 611 %rem = srem i64 %x, %y 612 store i64 %rem, ptr addrspace(1) %out1 613 ret void 614} 615 616define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i32> %x, <2 x i32> %y) { 617; GFX8-LABEL: sdivrem_v2i32: 618; GFX8: ; %bb.0: 619; GFX8-NEXT: s_load_dwordx8 s[4:11], s[8:9], 0x0 620; GFX8-NEXT: s_waitcnt lgkmcnt(0) 621; GFX8-NEXT: s_ashr_i32 s2, s10, 31 622; GFX8-NEXT: s_add_i32 s0, s10, s2 623; GFX8-NEXT: s_xor_b32 s3, s0, s2 624; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s3 625; GFX8-NEXT: s_ashr_i32 s10, s11, 31 626; GFX8-NEXT: s_add_i32 s0, s11, s10 627; GFX8-NEXT: s_xor_b32 s11, s0, s10 628; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 629; GFX8-NEXT: s_sub_i32 s0, 0, s3 630; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s11 631; GFX8-NEXT: s_ashr_i32 s12, s8, 31 632; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 633; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 634; GFX8-NEXT: v_rcp_iflag_f32_e32 v1, v1 635; GFX8-NEXT: s_sub_i32 s1, 0, s11 636; GFX8-NEXT: v_mul_lo_u32 v2, s0, v0 637; GFX8-NEXT: s_add_i32 s0, s8, s12 638; GFX8-NEXT: s_xor_b32 s0, s0, s12 639; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 640; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 641; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 642; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 643; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 644; GFX8-NEXT: v_mul_lo_u32 v2, s1, v1 645; GFX8-NEXT: v_mul_lo_u32 v3, v0, s3 646; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v0 647; GFX8-NEXT: v_mul_hi_u32 v2, v1, v2 648; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s0, v3 649; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v3 650; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 651; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s3, v3 652; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 653; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v0 654; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v3 655; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 656; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s3, v3 657; GFX8-NEXT: s_xor_b32 s0, s12, s2 658; GFX8-NEXT: s_ashr_i32 s2, s9, 31 659; GFX8-NEXT: s_add_i32 s1, s9, s2 660; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 661; GFX8-NEXT: s_xor_b32 s1, s1, s2 662; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 663; GFX8-NEXT: v_mul_hi_u32 v1, s1, v1 664; GFX8-NEXT: v_xor_b32_e32 v2, s12, v3 665; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 666; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 667; GFX8-NEXT: v_mul_lo_u32 v3, v1, s11 668; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s12, v2 669; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 670; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s1, v3 671; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 672; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 673; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s11, v3 674; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 675; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 676; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 677; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 678; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s11, v3 679; GFX8-NEXT: s_xor_b32 s0, s2, s10 680; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 681; GFX8-NEXT: v_xor_b32_e32 v1, s0, v1 682; GFX8-NEXT: v_mov_b32_e32 v4, s4 683; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, s0, v1 684; GFX8-NEXT: v_mov_b32_e32 v5, s5 685; GFX8-NEXT: v_xor_b32_e32 v3, s2, v3 686; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 687; GFX8-NEXT: v_mov_b32_e32 v0, s6 688; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s2, v3 689; GFX8-NEXT: v_mov_b32_e32 v1, s7 690; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 691; GFX8-NEXT: s_endpgm 692; 693; GFX9-LABEL: sdivrem_v2i32: 694; GFX9: ; %bb.0: 695; GFX9-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0 696; GFX9-NEXT: s_waitcnt lgkmcnt(0) 697; GFX9-NEXT: s_ashr_i32 s8, s6, 31 698; GFX9-NEXT: s_add_i32 s6, s6, s8 699; GFX9-NEXT: s_xor_b32 s6, s6, s8 700; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s6 701; GFX9-NEXT: s_ashr_i32 s9, s7, 31 702; GFX9-NEXT: s_add_i32 s7, s7, s9 703; GFX9-NEXT: s_xor_b32 s7, s7, s9 704; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 705; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s7 706; GFX9-NEXT: s_sub_i32 s12, 0, s6 707; GFX9-NEXT: s_ashr_i32 s10, s4, 31 708; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 709; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 710; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 711; GFX9-NEXT: s_add_i32 s4, s4, s10 712; GFX9-NEXT: s_xor_b32 s4, s4, s10 713; GFX9-NEXT: v_mul_lo_u32 v2, s12, v0 714; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 715; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 716; GFX9-NEXT: s_sub_i32 s12, 0, s7 717; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 718; GFX9-NEXT: s_ashr_i32 s11, s5, 31 719; GFX9-NEXT: v_mul_lo_u32 v3, s12, v1 720; GFX9-NEXT: s_add_i32 s5, s5, s11 721; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 722; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 723; GFX9-NEXT: v_mul_hi_u32 v2, v1, v3 724; GFX9-NEXT: s_xor_b32 s5, s5, s11 725; GFX9-NEXT: v_mul_lo_u32 v3, v0, s6 726; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 727; GFX9-NEXT: v_add_u32_e32 v2, 1, v0 728; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 729; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 730; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 731; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 732; GFX9-NEXT: v_subrev_u32_e32 v2, s6, v3 733; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 734; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 735; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v2 736; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 737; GFX9-NEXT: v_subrev_u32_e32 v3, s6, v2 738; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 739; GFX9-NEXT: v_mul_lo_u32 v3, v1, s7 740; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 741; GFX9-NEXT: s_xor_b32 s4, s10, s8 742; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 743; GFX9-NEXT: v_sub_u32_e32 v3, s5, v3 744; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 745; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 746; GFX9-NEXT: v_subrev_u32_e32 v4, s7, v3 747; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 748; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 749; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 750; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 751; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 752; GFX9-NEXT: v_subrev_u32_e32 v4, s7, v3 753; GFX9-NEXT: s_xor_b32 s4, s11, s9 754; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 755; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 756; GFX9-NEXT: v_xor_b32_e32 v2, s10, v2 757; GFX9-NEXT: v_subrev_u32_e32 v1, s4, v1 758; GFX9-NEXT: v_xor_b32_e32 v3, s11, v3 759; GFX9-NEXT: v_mov_b32_e32 v4, 0 760; GFX9-NEXT: v_subrev_u32_e32 v2, s10, v2 761; GFX9-NEXT: v_subrev_u32_e32 v3, s11, v3 762; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] 763; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] 764; GFX9-NEXT: s_endpgm 765; 766; GFX10-LABEL: sdivrem_v2i32: 767; GFX10: ; %bb.0: 768; GFX10-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x0 769; GFX10-NEXT: s_waitcnt lgkmcnt(0) 770; GFX10-NEXT: s_ashr_i32 s1, s18, 31 771; GFX10-NEXT: s_ashr_i32 s2, s19, 31 772; GFX10-NEXT: s_add_i32 s0, s18, s1 773; GFX10-NEXT: s_add_i32 s3, s19, s2 774; GFX10-NEXT: s_xor_b32 s4, s0, s1 775; GFX10-NEXT: s_xor_b32 s3, s3, s2 776; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s4 777; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s3 778; GFX10-NEXT: s_sub_i32 s0, 0, s4 779; GFX10-NEXT: s_sub_i32 s5, 0, s3 780; GFX10-NEXT: s_ashr_i32 s6, s17, 31 781; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 782; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 783; GFX10-NEXT: s_add_i32 s7, s17, s6 784; GFX10-NEXT: s_xor_b32 s7, s7, s6 785; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 786; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 787; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 788; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 789; GFX10-NEXT: v_mul_lo_u32 v2, s0, v0 790; GFX10-NEXT: v_mul_lo_u32 v3, s5, v1 791; GFX10-NEXT: s_ashr_i32 s5, s16, 31 792; GFX10-NEXT: s_add_i32 s0, s16, s5 793; GFX10-NEXT: s_xor_b32 s1, s5, s1 794; GFX10-NEXT: s_xor_b32 s0, s0, s5 795; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 796; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 797; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 798; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 799; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 800; GFX10-NEXT: v_mul_hi_u32 v1, s7, v1 801; GFX10-NEXT: v_mul_lo_u32 v2, v0, s4 802; GFX10-NEXT: v_mul_lo_u32 v3, v1, s3 803; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 804; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v1 805; GFX10-NEXT: v_sub_nc_u32_e32 v2, s0, v2 806; GFX10-NEXT: v_sub_nc_u32_e32 v3, s7, v3 807; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s4, v2 808; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s3, v3 809; GFX10-NEXT: v_cmp_le_u32_e64 s0, s4, v2 810; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s3, v3 811; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 812; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 813; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0 814; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo 815; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v1 816; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 817; GFX10-NEXT: v_cmp_le_u32_e64 s0, s4, v2 818; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s3, v3 819; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s4, v2 820; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s3, v3 821; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 822; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 823; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0 824; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo 825; GFX10-NEXT: s_xor_b32 s0, s6, s2 826; GFX10-NEXT: v_xor_b32_e32 v0, s1, v0 827; GFX10-NEXT: v_xor_b32_e32 v1, s0, v1 828; GFX10-NEXT: v_xor_b32_e32 v2, s5, v2 829; GFX10-NEXT: v_xor_b32_e32 v3, s6, v3 830; GFX10-NEXT: v_mov_b32_e32 v4, 0 831; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s1, v0 832; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s0, v1 833; GFX10-NEXT: v_subrev_nc_u32_e32 v2, s5, v2 834; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s6, v3 835; GFX10-NEXT: global_store_dwordx2 v4, v[0:1], s[12:13] 836; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[14:15] 837; GFX10-NEXT: s_endpgm 838 %div = sdiv <2 x i32> %x, %y 839 store <2 x i32> %div, ptr addrspace(1) %out0 840 %rem = srem <2 x i32> %x, %y 841 store <2 x i32> %rem, ptr addrspace(1) %out1 842 ret void 843} 844 845define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %x, <4 x i32> %y) { 846; GFX8-LABEL: sdivrem_v4i32: 847; GFX8: ; %bb.0: 848; GFX8-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x10 849; GFX8-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0 850; GFX8-NEXT: s_waitcnt lgkmcnt(0) 851; GFX8-NEXT: s_ashr_i32 s2, s16, 31 852; GFX8-NEXT: s_add_i32 s0, s16, s2 853; GFX8-NEXT: s_xor_b32 s3, s0, s2 854; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s3 855; GFX8-NEXT: s_sub_i32 s1, 0, s3 856; GFX8-NEXT: s_ashr_i32 s9, s17, 31 857; GFX8-NEXT: s_add_i32 s0, s17, s9 858; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 859; GFX8-NEXT: s_xor_b32 s10, s0, s9 860; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s10 861; GFX8-NEXT: s_ashr_i32 s8, s12, 31 862; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 863; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 864; GFX8-NEXT: s_add_i32 s0, s12, s8 865; GFX8-NEXT: s_xor_b32 s0, s0, s8 866; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 867; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 868; GFX8-NEXT: s_sub_i32 s11, 0, s10 869; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 870; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 871; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 872; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 873; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 874; GFX8-NEXT: v_mul_lo_u32 v2, v0, s3 875; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 876; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s0, v2 877; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 878; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 879; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s3, v2 880; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 881; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 882; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 883; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 884; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s3, v2 885; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 886; GFX8-NEXT: v_mul_lo_u32 v3, s11, v1 887; GFX8-NEXT: s_xor_b32 s0, s8, s2 888; GFX8-NEXT: s_ashr_i32 s2, s13, 31 889; GFX8-NEXT: s_add_i32 s1, s13, s2 890; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 891; GFX8-NEXT: s_xor_b32 s1, s1, s2 892; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 893; GFX8-NEXT: v_xor_b32_e32 v2, s8, v2 894; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 895; GFX8-NEXT: v_mul_hi_u32 v1, s1, v1 896; GFX8-NEXT: s_ashr_i32 s3, s18, 31 897; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 898; GFX8-NEXT: v_mul_lo_u32 v3, v1, s10 899; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s8, v2 900; GFX8-NEXT: s_add_i32 s0, s18, s3 901; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s1, v3 902; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v1 903; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 904; GFX8-NEXT: s_xor_b32 s8, s0, s3 905; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 906; GFX8-NEXT: v_cvt_f32_u32_e32 v3, s8 907; GFX8-NEXT: v_subrev_u32_e64 v5, s[0:1], s10, v2 908; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 909; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3 910; GFX8-NEXT: v_add_u32_e32 v5, vcc, 1, v1 911; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 912; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 913; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 914; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 915; GFX8-NEXT: v_subrev_u32_e64 v5, s[0:1], s10, v2 916; GFX8-NEXT: s_sub_i32 s0, 0, s8 917; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 918; GFX8-NEXT: v_mul_lo_u32 v5, s0, v3 919; GFX8-NEXT: s_xor_b32 s0, s2, s9 920; GFX8-NEXT: s_ashr_i32 s9, s14, 31 921; GFX8-NEXT: s_add_i32 s1, s14, s9 922; GFX8-NEXT: v_mul_hi_u32 v5, v3, v5 923; GFX8-NEXT: s_xor_b32 s1, s1, s9 924; GFX8-NEXT: v_xor_b32_e32 v2, s2, v2 925; GFX8-NEXT: v_xor_b32_e32 v1, s0, v1 926; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 927; GFX8-NEXT: v_mul_hi_u32 v3, s1, v3 928; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s2, v2 929; GFX8-NEXT: s_ashr_i32 s2, s19, 31 930; GFX8-NEXT: v_mul_lo_u32 v6, v3, s8 931; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, s0, v1 932; GFX8-NEXT: s_add_i32 s0, s19, s2 933; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s1, v6 934; GFX8-NEXT: v_add_u32_e32 v6, vcc, 1, v3 935; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 936; GFX8-NEXT: s_xor_b32 s10, s0, s2 937; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc 938; GFX8-NEXT: v_cvt_f32_u32_e32 v6, s10 939; GFX8-NEXT: v_subrev_u32_e64 v7, s[0:1], s8, v2 940; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc 941; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6 942; GFX8-NEXT: v_add_u32_e32 v7, vcc, 1, v3 943; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 944; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 945; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 946; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 947; GFX8-NEXT: v_subrev_u32_e64 v7, s[0:1], s8, v2 948; GFX8-NEXT: s_sub_i32 s0, 0, s10 949; GFX8-NEXT: v_cndmask_b32_e32 v7, v2, v7, vcc 950; GFX8-NEXT: v_mul_lo_u32 v2, s0, v6 951; GFX8-NEXT: s_xor_b32 s0, s9, s3 952; GFX8-NEXT: s_ashr_i32 s3, s15, 31 953; GFX8-NEXT: s_add_i32 s1, s15, s3 954; GFX8-NEXT: v_mul_hi_u32 v2, v6, v2 955; GFX8-NEXT: s_xor_b32 s1, s1, s3 956; GFX8-NEXT: v_xor_b32_e32 v3, s0, v3 957; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 958; GFX8-NEXT: v_mul_hi_u32 v8, s1, v2 959; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s0, v3 960; GFX8-NEXT: v_xor_b32_e32 v3, s9, v7 961; GFX8-NEXT: v_mul_lo_u32 v7, v8, s10 962; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s9, v3 963; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s1, v7 964; GFX8-NEXT: v_add_u32_e32 v7, vcc, 1, v8 965; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s10, v3 966; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc 967; GFX8-NEXT: v_subrev_u32_e64 v8, s[0:1], s10, v3 968; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 969; GFX8-NEXT: v_add_u32_e32 v8, vcc, 1, v7 970; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s10, v3 971; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 972; GFX8-NEXT: v_subrev_u32_e64 v8, s[0:1], s10, v3 973; GFX8-NEXT: v_cndmask_b32_e32 v8, v3, v8, vcc 974; GFX8-NEXT: s_xor_b32 s0, s3, s2 975; GFX8-NEXT: v_xor_b32_e32 v3, s0, v7 976; GFX8-NEXT: v_xor_b32_e32 v7, s3, v8 977; GFX8-NEXT: v_mov_b32_e32 v9, s5 978; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s0, v3 979; GFX8-NEXT: v_mov_b32_e32 v8, s4 980; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 981; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s3, v7 982; GFX8-NEXT: v_mov_b32_e32 v0, s6 983; GFX8-NEXT: v_mov_b32_e32 v1, s7 984; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 985; GFX8-NEXT: s_endpgm 986; 987; GFX9-LABEL: sdivrem_v4i32: 988; GFX9: ; %bb.0: 989; GFX9-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x10 990; GFX9-NEXT: s_waitcnt lgkmcnt(0) 991; GFX9-NEXT: s_ashr_i32 s12, s4, 31 992; GFX9-NEXT: s_add_i32 s4, s4, s12 993; GFX9-NEXT: s_xor_b32 s4, s4, s12 994; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s4 995; GFX9-NEXT: s_ashr_i32 s13, s5, 31 996; GFX9-NEXT: s_add_i32 s5, s5, s13 997; GFX9-NEXT: s_xor_b32 s5, s5, s13 998; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 999; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s5 1000; GFX9-NEXT: s_sub_i32 s15, 0, s4 1001; GFX9-NEXT: s_ashr_i32 s14, s0, 31 1002; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1003; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 1004; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 1005; GFX9-NEXT: s_add_i32 s0, s0, s14 1006; GFX9-NEXT: s_xor_b32 s0, s0, s14 1007; GFX9-NEXT: v_mul_lo_u32 v2, s15, v0 1008; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 1009; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 1010; GFX9-NEXT: s_sub_i32 s15, 0, s5 1011; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 1012; GFX9-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x0 1013; GFX9-NEXT: v_mul_lo_u32 v3, s15, v1 1014; GFX9-NEXT: s_ashr_i32 s15, s1, 31 1015; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 1016; GFX9-NEXT: v_mul_hi_u32 v0, s0, v0 1017; GFX9-NEXT: v_mul_hi_u32 v2, v1, v3 1018; GFX9-NEXT: s_add_i32 s1, s1, s15 1019; GFX9-NEXT: s_xor_b32 s1, s1, s15 1020; GFX9-NEXT: v_mul_lo_u32 v3, v0, s4 1021; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 1022; GFX9-NEXT: v_add_u32_e32 v2, 1, v0 1023; GFX9-NEXT: v_mul_hi_u32 v1, s1, v1 1024; GFX9-NEXT: v_sub_u32_e32 v3, s0, v3 1025; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s4, v3 1026; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1027; GFX9-NEXT: v_subrev_u32_e32 v2, s4, v3 1028; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 1029; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 1030; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s4, v2 1031; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1032; GFX9-NEXT: v_subrev_u32_e32 v3, s4, v2 1033; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1034; GFX9-NEXT: v_mul_lo_u32 v3, v1, s5 1035; GFX9-NEXT: s_xor_b32 s0, s14, s12 1036; GFX9-NEXT: v_xor_b32_e32 v0, s0, v0 1037; GFX9-NEXT: v_subrev_u32_e32 v0, s0, v0 1038; GFX9-NEXT: v_xor_b32_e32 v2, s14, v2 1039; GFX9-NEXT: s_ashr_i32 s0, s6, 31 1040; GFX9-NEXT: v_subrev_u32_e32 v4, s14, v2 1041; GFX9-NEXT: v_sub_u32_e32 v2, s1, v3 1042; GFX9-NEXT: s_add_i32 s1, s6, s0 1043; GFX9-NEXT: s_xor_b32 s1, s1, s0 1044; GFX9-NEXT: v_cvt_f32_u32_e32 v3, s1 1045; GFX9-NEXT: v_add_u32_e32 v5, 1, v1 1046; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v2 1047; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1048; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 1049; GFX9-NEXT: v_subrev_u32_e32 v5, s5, v2 1050; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 1051; GFX9-NEXT: v_add_u32_e32 v5, 1, v1 1052; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1053; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 1054; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v2 1055; GFX9-NEXT: s_sub_i32 s4, 0, s1 1056; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1057; GFX9-NEXT: v_mul_lo_u32 v5, s4, v3 1058; GFX9-NEXT: s_xor_b32 s4, s15, s13 1059; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 1060; GFX9-NEXT: v_subrev_u32_e32 v1, s4, v1 1061; GFX9-NEXT: s_ashr_i32 s4, s7, 31 1062; GFX9-NEXT: s_add_i32 s6, s7, s4 1063; GFX9-NEXT: v_mul_hi_u32 v5, v3, v5 1064; GFX9-NEXT: s_xor_b32 s6, s6, s4 1065; GFX9-NEXT: v_cvt_f32_u32_e32 v7, s6 1066; GFX9-NEXT: v_subrev_u32_e32 v6, s5, v2 1067; GFX9-NEXT: s_ashr_i32 s5, s2, 31 1068; GFX9-NEXT: s_add_i32 s2, s2, s5 1069; GFX9-NEXT: s_xor_b32 s2, s2, s5 1070; GFX9-NEXT: v_add_u32_e32 v3, v3, v5 1071; GFX9-NEXT: v_mul_hi_u32 v3, s2, v3 1072; GFX9-NEXT: v_rcp_iflag_f32_e32 v7, v7 1073; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 1074; GFX9-NEXT: v_xor_b32_e32 v2, s15, v2 1075; GFX9-NEXT: v_mul_lo_u32 v6, v3, s1 1076; GFX9-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 1077; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v7 1078; GFX9-NEXT: v_subrev_u32_e32 v5, s15, v2 1079; GFX9-NEXT: v_sub_u32_e32 v2, s2, v6 1080; GFX9-NEXT: s_sub_i32 s2, 0, s6 1081; GFX9-NEXT: v_mul_lo_u32 v8, s2, v7 1082; GFX9-NEXT: v_add_u32_e32 v6, 1, v3 1083; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s1, v2 1084; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc 1085; GFX9-NEXT: v_subrev_u32_e32 v6, s1, v2 1086; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 1087; GFX9-NEXT: v_mul_hi_u32 v8, v7, v8 1088; GFX9-NEXT: v_add_u32_e32 v6, 1, v3 1089; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s1, v2 1090; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc 1091; GFX9-NEXT: v_subrev_u32_e32 v6, s1, v2 1092; GFX9-NEXT: s_ashr_i32 s1, s3, 31 1093; GFX9-NEXT: s_add_i32 s2, s3, s1 1094; GFX9-NEXT: s_xor_b32 s2, s2, s1 1095; GFX9-NEXT: v_add_u32_e32 v7, v7, v8 1096; GFX9-NEXT: v_mul_hi_u32 v7, s2, v7 1097; GFX9-NEXT: s_xor_b32 s0, s5, s0 1098; GFX9-NEXT: v_cndmask_b32_e32 v6, v2, v6, vcc 1099; GFX9-NEXT: v_xor_b32_e32 v2, s0, v3 1100; GFX9-NEXT: v_mul_lo_u32 v3, v7, s6 1101; GFX9-NEXT: v_add_u32_e32 v8, 1, v7 1102; GFX9-NEXT: v_subrev_u32_e32 v2, s0, v2 1103; GFX9-NEXT: s_xor_b32 s0, s1, s4 1104; GFX9-NEXT: v_sub_u32_e32 v3, s2, v3 1105; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 1106; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 1107; GFX9-NEXT: v_subrev_u32_e32 v8, s6, v3 1108; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 1109; GFX9-NEXT: v_add_u32_e32 v8, 1, v7 1110; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 1111; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 1112; GFX9-NEXT: v_subrev_u32_e32 v8, s6, v3 1113; GFX9-NEXT: v_cndmask_b32_e32 v8, v3, v8, vcc 1114; GFX9-NEXT: v_xor_b32_e32 v3, s0, v7 1115; GFX9-NEXT: v_xor_b32_e32 v6, s5, v6 1116; GFX9-NEXT: v_subrev_u32_e32 v3, s0, v3 1117; GFX9-NEXT: v_xor_b32_e32 v7, s1, v8 1118; GFX9-NEXT: v_mov_b32_e32 v8, 0 1119; GFX9-NEXT: v_subrev_u32_e32 v6, s5, v6 1120; GFX9-NEXT: v_subrev_u32_e32 v7, s1, v7 1121; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1122; GFX9-NEXT: global_store_dwordx4 v8, v[0:3], s[8:9] 1123; GFX9-NEXT: global_store_dwordx4 v8, v[4:7], s[10:11] 1124; GFX9-NEXT: s_endpgm 1125; 1126; GFX10-LABEL: sdivrem_v4i32: 1127; GFX10: ; %bb.0: 1128; GFX10-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x10 1129; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1130; GFX10-NEXT: s_ashr_i32 s10, s4, 31 1131; GFX10-NEXT: s_ashr_i32 s11, s5, 31 1132; GFX10-NEXT: s_ashr_i32 s12, s6, 31 1133; GFX10-NEXT: s_ashr_i32 s13, s7, 31 1134; GFX10-NEXT: s_add_i32 s4, s4, s10 1135; GFX10-NEXT: s_add_i32 s5, s5, s11 1136; GFX10-NEXT: s_add_i32 s6, s6, s12 1137; GFX10-NEXT: s_add_i32 s7, s7, s13 1138; GFX10-NEXT: s_xor_b32 s14, s4, s10 1139; GFX10-NEXT: s_xor_b32 s15, s5, s11 1140; GFX10-NEXT: s_xor_b32 s16, s6, s12 1141; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s14 1142; GFX10-NEXT: s_xor_b32 s17, s7, s13 1143; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s15 1144; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s16 1145; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s17 1146; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 1147; GFX10-NEXT: s_sub_i32 s4, 0, s14 1148; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 1149; GFX10-NEXT: v_rcp_iflag_f32_e32 v2, v2 1150; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 1151; GFX10-NEXT: s_sub_i32 s5, 0, s15 1152; GFX10-NEXT: s_sub_i32 s6, 0, s16 1153; GFX10-NEXT: s_ashr_i32 s18, s0, 31 1154; GFX10-NEXT: s_ashr_i32 s19, s1, 31 1155; GFX10-NEXT: s_ashr_i32 s20, s2, 31 1156; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1157; GFX10-NEXT: s_ashr_i32 s21, s3, 31 1158; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 1159; GFX10-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 1160; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1161; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 1162; GFX10-NEXT: s_add_i32 s0, s0, s18 1163; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 1164; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2 1165; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 1166; GFX10-NEXT: v_mul_lo_u32 v4, s4, v0 1167; GFX10-NEXT: s_sub_i32 s4, 0, s17 1168; GFX10-NEXT: v_mul_lo_u32 v5, s5, v1 1169; GFX10-NEXT: v_mul_lo_u32 v6, s6, v2 1170; GFX10-NEXT: v_mul_lo_u32 v7, s4, v3 1171; GFX10-NEXT: s_add_i32 s1, s1, s19 1172; GFX10-NEXT: s_add_i32 s2, s2, s20 1173; GFX10-NEXT: s_add_i32 s3, s3, s21 1174; GFX10-NEXT: v_mul_hi_u32 v4, v0, v4 1175; GFX10-NEXT: s_xor_b32 s0, s0, s18 1176; GFX10-NEXT: v_mul_hi_u32 v5, v1, v5 1177; GFX10-NEXT: v_mul_hi_u32 v6, v2, v6 1178; GFX10-NEXT: v_mul_hi_u32 v7, v3, v7 1179; GFX10-NEXT: s_xor_b32 s1, s1, s19 1180; GFX10-NEXT: s_xor_b32 s2, s2, s20 1181; GFX10-NEXT: s_xor_b32 s3, s3, s21 1182; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v4 1183; GFX10-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0 1184; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v5 1185; GFX10-NEXT: v_add_nc_u32_e32 v2, v2, v6 1186; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v7 1187; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 1188; GFX10-NEXT: s_xor_b32 s8, s18, s10 1189; GFX10-NEXT: v_mul_hi_u32 v1, s1, v1 1190; GFX10-NEXT: v_mul_hi_u32 v2, s2, v2 1191; GFX10-NEXT: v_mul_hi_u32 v3, s3, v3 1192; GFX10-NEXT: s_xor_b32 s9, s19, s11 1193; GFX10-NEXT: s_xor_b32 s10, s20, s12 1194; GFX10-NEXT: v_mul_lo_u32 v4, v0, s14 1195; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v0 1196; GFX10-NEXT: v_mul_lo_u32 v5, v1, s15 1197; GFX10-NEXT: v_mul_lo_u32 v6, v2, s16 1198; GFX10-NEXT: v_mul_lo_u32 v7, v3, s17 1199; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v1 1200; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v2 1201; GFX10-NEXT: v_add_nc_u32_e32 v11, 1, v3 1202; GFX10-NEXT: v_sub_nc_u32_e32 v4, s0, v4 1203; GFX10-NEXT: v_sub_nc_u32_e32 v5, s1, v5 1204; GFX10-NEXT: v_sub_nc_u32_e32 v6, s2, v6 1205; GFX10-NEXT: v_sub_nc_u32_e32 v7, s3, v7 1206; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s14, v4 1207; GFX10-NEXT: v_cmp_le_u32_e64 s0, s15, v5 1208; GFX10-NEXT: v_cmp_le_u32_e64 s1, s16, v6 1209; GFX10-NEXT: v_cmp_le_u32_e64 s2, s17, v7 1210; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1211; GFX10-NEXT: v_subrev_nc_u32_e32 v8, s14, v4 1212; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v9, s0 1213; GFX10-NEXT: v_subrev_nc_u32_e32 v9, s15, v5 1214; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s1 1215; GFX10-NEXT: v_subrev_nc_u32_e32 v10, s16, v6 1216; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s2 1217; GFX10-NEXT: v_subrev_nc_u32_e32 v11, s17, v7 1218; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo 1219; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v9, s0 1220; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v10, s1 1221; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v0 1222; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v11, s2 1223; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v1 1224; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v2 1225; GFX10-NEXT: v_add_nc_u32_e32 v11, 1, v3 1226; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s14, v4 1227; GFX10-NEXT: v_cmp_le_u32_e64 s0, s15, v5 1228; GFX10-NEXT: v_cmp_le_u32_e64 s1, s16, v6 1229; GFX10-NEXT: v_cmp_le_u32_e64 s2, s17, v7 1230; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1231; GFX10-NEXT: v_subrev_nc_u32_e32 v8, s14, v4 1232; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v9, s0 1233; GFX10-NEXT: v_subrev_nc_u32_e32 v9, s15, v5 1234; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s1 1235; GFX10-NEXT: v_subrev_nc_u32_e32 v10, s16, v6 1236; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s2 1237; GFX10-NEXT: v_subrev_nc_u32_e32 v11, s17, v7 1238; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo 1239; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v9, s0 1240; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v10, s1 1241; GFX10-NEXT: s_xor_b32 s0, s21, s13 1242; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v11, s2 1243; GFX10-NEXT: v_xor_b32_e32 v0, s8, v0 1244; GFX10-NEXT: v_xor_b32_e32 v1, s9, v1 1245; GFX10-NEXT: v_xor_b32_e32 v2, s10, v2 1246; GFX10-NEXT: v_xor_b32_e32 v3, s0, v3 1247; GFX10-NEXT: v_xor_b32_e32 v4, s18, v4 1248; GFX10-NEXT: v_xor_b32_e32 v5, s19, v5 1249; GFX10-NEXT: v_xor_b32_e32 v6, s20, v6 1250; GFX10-NEXT: v_xor_b32_e32 v7, s21, v7 1251; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s8, v0 1252; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s9, v1 1253; GFX10-NEXT: v_subrev_nc_u32_e32 v2, s10, v2 1254; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s0, v3 1255; GFX10-NEXT: v_mov_b32_e32 v8, 0 1256; GFX10-NEXT: v_subrev_nc_u32_e32 v4, s18, v4 1257; GFX10-NEXT: v_subrev_nc_u32_e32 v5, s19, v5 1258; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s20, v6 1259; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s21, v7 1260; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1261; GFX10-NEXT: global_store_dwordx4 v8, v[0:3], s[4:5] 1262; GFX10-NEXT: global_store_dwordx4 v8, v[4:7], s[6:7] 1263; GFX10-NEXT: s_endpgm 1264 %div = sdiv <4 x i32> %x, %y 1265 store <4 x i32> %div, ptr addrspace(1) %out0 1266 %rem = srem <4 x i32> %x, %y 1267 store <4 x i32> %rem, ptr addrspace(1) %out1 1268 ret void 1269} 1270 1271define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i64> %x, <2 x i64> %y) { 1272; GFX8-LABEL: sdivrem_v2i64: 1273; GFX8: ; %bb.0: 1274; GFX8-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x0 1275; GFX8-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x20 1276; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1277; GFX8-NEXT: s_ashr_i32 s4, s17, 31 1278; GFX8-NEXT: s_ashr_i32 s6, s1, 31 1279; GFX8-NEXT: s_add_u32 s10, s16, s4 1280; GFX8-NEXT: s_addc_u32 s11, s17, s4 1281; GFX8-NEXT: s_add_u32 s0, s0, s6 1282; GFX8-NEXT: s_mov_b32 s7, s6 1283; GFX8-NEXT: s_addc_u32 s1, s1, s6 1284; GFX8-NEXT: s_xor_b64 s[8:9], s[0:1], s[6:7] 1285; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s9 1286; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s8 1287; GFX8-NEXT: s_mov_b32 s5, s4 1288; GFX8-NEXT: s_xor_b64 s[10:11], s[10:11], s[4:5] 1289; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 1290; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 1291; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 1292; GFX8-NEXT: s_sub_u32 s16, 0, s8 1293; GFX8-NEXT: s_subb_u32 s17, 0, s9 1294; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 1295; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 1296; GFX8-NEXT: v_trunc_f32_e32 v2, v1 1297; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 1298; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 1299; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v0 1300; GFX8-NEXT: v_cvt_u32_f32_e32 v4, v2 1301; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v3, 0 1302; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v4, v[1:2] 1303; GFX8-NEXT: v_mul_hi_u32 v5, v3, v0 1304; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v3, v[1:2] 1305; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 1306; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 1307; GFX8-NEXT: v_mul_lo_u32 v6, v3, v1 1308; GFX8-NEXT: v_mul_lo_u32 v7, v4, v1 1309; GFX8-NEXT: v_mul_hi_u32 v8, v3, v1 1310; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 1311; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 1312; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1313; GFX8-NEXT: v_add_u32_e32 v0, vcc, v7, v0 1314; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1315; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 1316; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1317; GFX8-NEXT: v_add_u32_e32 v2, vcc, v6, v2 1318; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 1319; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1320; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 1321; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1322; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1323; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 1324; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 1325; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 1326; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v1, vcc 1327; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v3, 0 1328; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v4, v[1:2] 1329; GFX8-NEXT: v_mul_hi_u32 v6, v3, v0 1330; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v3, v[1:2] 1331; GFX8-NEXT: v_mul_lo_u32 v2, v4, v0 1332; GFX8-NEXT: v_mul_hi_u32 v0, v4, v0 1333; GFX8-NEXT: v_mul_lo_u32 v5, v3, v1 1334; GFX8-NEXT: s_xor_b64 s[16:17], s[4:5], s[6:7] 1335; GFX8-NEXT: s_ashr_i32 s6, s19, 31 1336; GFX8-NEXT: s_mov_b32 s7, s6 1337; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 1338; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1339; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 1340; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1341; GFX8-NEXT: v_mul_lo_u32 v6, v4, v1 1342; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 1343; GFX8-NEXT: v_mul_hi_u32 v5, v3, v1 1344; GFX8-NEXT: v_add_u32_e32 v0, vcc, v6, v0 1345; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1346; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v5 1347; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1348; GFX8-NEXT: v_add_u32_e32 v5, vcc, v6, v5 1349; GFX8-NEXT: v_mul_hi_u32 v1, v4, v1 1350; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1351; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1352; GFX8-NEXT: v_add_u32_e32 v2, vcc, v5, v2 1353; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 1354; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 1355; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc 1356; GFX8-NEXT: v_mul_lo_u32 v2, s11, v0 1357; GFX8-NEXT: v_mul_lo_u32 v3, s10, v1 1358; GFX8-NEXT: v_mul_hi_u32 v4, s10, v0 1359; GFX8-NEXT: v_mul_hi_u32 v0, s11, v0 1360; GFX8-NEXT: v_mul_hi_u32 v5, s11, v1 1361; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 1362; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1363; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 1364; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1365; GFX8-NEXT: v_mul_lo_u32 v4, s11, v1 1366; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 1367; GFX8-NEXT: v_mul_hi_u32 v3, s10, v1 1368; GFX8-NEXT: v_add_u32_e32 v0, vcc, v4, v0 1369; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1370; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 1371; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1372; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 1373; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2 1374; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v4, 0 1375; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1376; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 1377; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v2 1378; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2] 1379; GFX8-NEXT: v_mov_b32_e32 v6, s11 1380; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s10, v0 1381; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v4, v[1:2] 1382; GFX8-NEXT: v_mov_b32_e32 v5, s9 1383; GFX8-NEXT: s_ashr_i32 s10, s3, 31 1384; GFX8-NEXT: v_subb_u32_e64 v6, s[0:1], v6, v1, vcc 1385; GFX8-NEXT: v_sub_u32_e64 v0, s[0:1], s11, v1 1386; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v6 1387; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] 1388; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7 1389; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc 1390; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] 1391; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v6 1392; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, s8, v7 1393; GFX8-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[0:1] 1394; GFX8-NEXT: v_subbrev_u32_e64 v9, s[0:1], 0, v0, vcc 1395; GFX8-NEXT: v_add_u32_e64 v1, s[0:1], 1, v4 1396; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v3, s[0:1] 1397; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v9 1398; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] 1399; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v8 1400; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] 1401; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v9 1402; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] 1403; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v1 1404; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v10, s[0:1] 1405; GFX8-NEXT: s_add_u32 s0, s18, s6 1406; GFX8-NEXT: s_addc_u32 s1, s19, s6 1407; GFX8-NEXT: s_add_u32 s2, s2, s10 1408; GFX8-NEXT: s_mov_b32 s11, s10 1409; GFX8-NEXT: s_addc_u32 s3, s3, s10 1410; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[10:11] 1411; GFX8-NEXT: v_cvt_f32_u32_e32 v14, s3 1412; GFX8-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc 1413; GFX8-NEXT: v_cvt_f32_u32_e32 v5, s2 1414; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, s8, v8 1415; GFX8-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v0, vcc 1416; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v14 1417; GFX8-NEXT: v_add_f32_e32 v0, v0, v5 1418; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 1419; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 1420; GFX8-NEXT: v_cndmask_b32_e32 v5, v1, v12, vcc 1421; GFX8-NEXT: s_xor_b64 s[8:9], s[0:1], s[6:7] 1422; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 1423; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 1424; GFX8-NEXT: v_trunc_f32_e32 v11, v1 1425; GFX8-NEXT: v_mul_f32_e32 v1, 0xcf800000, v11 1426; GFX8-NEXT: v_add_f32_e32 v0, v1, v0 1427; GFX8-NEXT: v_cvt_u32_f32_e32 v12, v0 1428; GFX8-NEXT: s_sub_u32 s5, 0, s2 1429; GFX8-NEXT: s_subb_u32 s20, 0, s3 1430; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc 1431; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v12, 0 1432; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v2 1433; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[0:1] 1434; GFX8-NEXT: v_cvt_u32_f32_e32 v5, v11 1435; GFX8-NEXT: v_cndmask_b32_e64 v10, v3, v10, s[0:1] 1436; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v15, vcc 1437; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[0:1] 1438; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[18:19], s5, v5, v[1:2] 1439; GFX8-NEXT: v_mul_lo_u32 v3, v5, v0 1440; GFX8-NEXT: v_mad_u64_u32 v[1:2], s[18:19], s20, v12, v[1:2] 1441; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v16, vcc 1442; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v2, s[0:1] 1443; GFX8-NEXT: v_mul_lo_u32 v8, v12, v1 1444; GFX8-NEXT: v_mul_hi_u32 v2, v12, v0 1445; GFX8-NEXT: v_mul_hi_u32 v0, v5, v0 1446; GFX8-NEXT: v_xor_b32_e32 v9, s17, v10 1447; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v8 1448; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1449; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 1450; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1451; GFX8-NEXT: v_mul_lo_u32 v3, v5, v1 1452; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2 1453; GFX8-NEXT: v_mul_hi_u32 v8, v12, v1 1454; GFX8-NEXT: v_add_u32_e32 v0, vcc, v3, v0 1455; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1456; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v8 1457; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1458; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v8 1459; GFX8-NEXT: v_mul_hi_u32 v1, v5, v1 1460; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1461; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1462; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 1463; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 1464; GFX8-NEXT: v_add_u32_e32 v8, vcc, v12, v0 1465; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s5, v8, 0 1466; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v1, vcc 1467; GFX8-NEXT: v_xor_b32_e32 v1, s16, v4 1468; GFX8-NEXT: v_mov_b32_e32 v0, v3 1469; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s5, v5, v[0:1] 1470; GFX8-NEXT: v_mov_b32_e32 v10, s17 1471; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s16, v1 1472; GFX8-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s20, v8, v[3:4] 1473; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v9, v10, vcc 1474; GFX8-NEXT: v_xor_b32_e32 v4, s4, v7 1475; GFX8-NEXT: v_mul_lo_u32 v7, v5, v2 1476; GFX8-NEXT: v_mul_lo_u32 v9, v8, v3 1477; GFX8-NEXT: v_mul_hi_u32 v11, v8, v2 1478; GFX8-NEXT: v_mul_hi_u32 v2, v5, v2 1479; GFX8-NEXT: v_xor_b32_e32 v6, s4, v6 1480; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 1481; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1482; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v11 1483; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1484; GFX8-NEXT: v_mul_lo_u32 v11, v5, v3 1485; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 1486; GFX8-NEXT: v_mul_hi_u32 v9, v8, v3 1487; GFX8-NEXT: v_add_u32_e32 v2, vcc, v11, v2 1488; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1489; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v9 1490; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1491; GFX8-NEXT: v_add_u32_e32 v9, vcc, v11, v9 1492; GFX8-NEXT: v_mul_hi_u32 v3, v5, v3 1493; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 1494; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1495; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 1496; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 1497; GFX8-NEXT: v_add_u32_e32 v2, vcc, v8, v2 1498; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v5, v3, vcc 1499; GFX8-NEXT: v_mov_b32_e32 v10, s4 1500; GFX8-NEXT: v_mul_lo_u32 v7, s9, v2 1501; GFX8-NEXT: v_mul_lo_u32 v8, s8, v3 1502; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s4, v4 1503; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v6, v10, vcc 1504; GFX8-NEXT: v_mul_hi_u32 v6, s8, v2 1505; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 1506; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1507; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 1508; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1509; GFX8-NEXT: v_mul_lo_u32 v7, s9, v3 1510; GFX8-NEXT: v_mul_hi_u32 v2, s9, v2 1511; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6 1512; GFX8-NEXT: v_mul_hi_u32 v8, s8, v3 1513; GFX8-NEXT: v_add_u32_e32 v2, vcc, v7, v2 1514; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1515; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v8 1516; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1517; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v8 1518; GFX8-NEXT: v_add_u32_e32 v8, vcc, v2, v6 1519; GFX8-NEXT: v_mul_hi_u32 v9, s9, v3 1520; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v8, 0 1521; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1522; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 1523; GFX8-NEXT: v_add_u32_e32 v9, vcc, v9, v6 1524; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s2, v9, v[3:4] 1525; GFX8-NEXT: v_mov_b32_e32 v10, s9 1526; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s8, v2 1527; GFX8-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s3, v8, v[6:7] 1528; GFX8-NEXT: v_mov_b32_e32 v3, s3 1529; GFX8-NEXT: v_subb_u32_e64 v7, s[0:1], v10, v6, vcc 1530; GFX8-NEXT: v_sub_u32_e64 v6, s[0:1], s9, v6 1531; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v7 1532; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[0:1] 1533; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v2 1534; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] 1535; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v7 1536; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v6, v3, vcc 1537; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[0:1] 1538; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, s2, v2 1539; GFX8-NEXT: v_subbrev_u32_e64 v12, s[0:1], 0, v6, vcc 1540; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v12 1541; GFX8-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] 1542; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v11 1543; GFX8-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[0:1] 1544; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v12 1545; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[0:1] 1546; GFX8-NEXT: v_add_u32_e64 v14, s[0:1], 1, v8 1547; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v6, v3, vcc 1548; GFX8-NEXT: v_addc_u32_e64 v15, s[0:1], 0, v9, s[0:1] 1549; GFX8-NEXT: v_add_u32_e32 v6, vcc, 1, v14 1550; GFX8-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc 1551; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 1552; GFX8-NEXT: v_subrev_u32_e64 v13, s[0:1], s2, v11 1553; GFX8-NEXT: v_subbrev_u32_e64 v3, s[0:1], 0, v3, s[0:1] 1554; GFX8-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc 1555; GFX8-NEXT: v_cndmask_b32_e32 v14, v15, v16, vcc 1556; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v10 1557; GFX8-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[0:1] 1558; GFX8-NEXT: v_cndmask_b32_e64 v8, v9, v14, s[0:1] 1559; GFX8-NEXT: v_cndmask_b32_e32 v9, v11, v13, vcc 1560; GFX8-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc 1561; GFX8-NEXT: v_cndmask_b32_e64 v9, v2, v9, s[0:1] 1562; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[0:1] 1563; GFX8-NEXT: s_xor_b64 s[0:1], s[6:7], s[10:11] 1564; GFX8-NEXT: v_xor_b32_e32 v2, s0, v6 1565; GFX8-NEXT: v_xor_b32_e32 v3, s1, v8 1566; GFX8-NEXT: v_mov_b32_e32 v6, s1 1567; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s0, v2 1568; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc 1569; GFX8-NEXT: v_xor_b32_e32 v6, s6, v9 1570; GFX8-NEXT: v_xor_b32_e32 v7, s6, v7 1571; GFX8-NEXT: v_mov_b32_e32 v8, s6 1572; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s6, v6 1573; GFX8-NEXT: v_subb_u32_e32 v7, vcc, v7, v8, vcc 1574; GFX8-NEXT: v_mov_b32_e32 v8, s12 1575; GFX8-NEXT: v_mov_b32_e32 v9, s13 1576; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 1577; GFX8-NEXT: s_nop 0 1578; GFX8-NEXT: v_mov_b32_e32 v0, s14 1579; GFX8-NEXT: v_mov_b32_e32 v1, s15 1580; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 1581; GFX8-NEXT: s_endpgm 1582; 1583; GFX9-LABEL: sdivrem_v2i64: 1584; GFX9: ; %bb.0: 1585; GFX9-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x0 1586; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x20 1587; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1588; GFX9-NEXT: s_ashr_i32 s4, s17, 31 1589; GFX9-NEXT: s_ashr_i32 s6, s1, 31 1590; GFX9-NEXT: s_add_u32 s10, s16, s4 1591; GFX9-NEXT: s_addc_u32 s11, s17, s4 1592; GFX9-NEXT: s_add_u32 s0, s0, s6 1593; GFX9-NEXT: s_mov_b32 s7, s6 1594; GFX9-NEXT: s_addc_u32 s1, s1, s6 1595; GFX9-NEXT: s_xor_b64 s[8:9], s[0:1], s[6:7] 1596; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s9 1597; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s8 1598; GFX9-NEXT: s_mov_b32 s5, s4 1599; GFX9-NEXT: s_xor_b64 s[10:11], s[10:11], s[4:5] 1600; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 1601; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 1602; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 1603; GFX9-NEXT: s_sub_u32 s16, 0, s8 1604; GFX9-NEXT: s_subb_u32 s17, 0, s9 1605; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 1606; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 1607; GFX9-NEXT: v_trunc_f32_e32 v2, v1 1608; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 1609; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 1610; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v0 1611; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v2 1612; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v3, 0 1613; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v4, v[1:2] 1614; GFX9-NEXT: v_mul_hi_u32 v5, v3, v0 1615; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v3, v[1:2] 1616; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 1617; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 1618; GFX9-NEXT: v_mul_lo_u32 v6, v3, v1 1619; GFX9-NEXT: v_mul_lo_u32 v7, v4, v1 1620; GFX9-NEXT: v_mul_hi_u32 v8, v3, v1 1621; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 1622; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 1623; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1624; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 1625; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1626; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 1627; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1628; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 1629; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v8 1630; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1631; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 1632; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 1633; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1634; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 1635; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 1636; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc 1637; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v3, 0 1638; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v4, v[1:2] 1639; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0 1640; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v3, v[1:2] 1641; GFX9-NEXT: v_mul_lo_u32 v2, v4, v0 1642; GFX9-NEXT: v_mul_hi_u32 v0, v4, v0 1643; GFX9-NEXT: v_mul_lo_u32 v5, v3, v1 1644; GFX9-NEXT: s_xor_b64 s[16:17], s[4:5], s[6:7] 1645; GFX9-NEXT: s_ashr_i32 s6, s19, 31 1646; GFX9-NEXT: s_mov_b32 s7, s6 1647; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 1648; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1649; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 1650; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1651; GFX9-NEXT: v_mul_lo_u32 v6, v4, v1 1652; GFX9-NEXT: v_add_u32_e32 v2, v5, v2 1653; GFX9-NEXT: v_mul_hi_u32 v5, v3, v1 1654; GFX9-NEXT: v_mul_hi_u32 v1, v4, v1 1655; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 1656; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1657; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 1658; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1659; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 1660; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 1661; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1662; GFX9-NEXT: v_add3_u32 v1, v5, v2, v1 1663; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 1664; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v1, vcc 1665; GFX9-NEXT: v_mul_lo_u32 v2, s11, v0 1666; GFX9-NEXT: v_mul_lo_u32 v3, s10, v1 1667; GFX9-NEXT: v_mul_hi_u32 v4, s10, v0 1668; GFX9-NEXT: v_mul_hi_u32 v0, s11, v0 1669; GFX9-NEXT: v_mul_hi_u32 v6, s11, v1 1670; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 1671; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1672; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 1673; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1674; GFX9-NEXT: v_mul_lo_u32 v4, s11, v1 1675; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 1676; GFX9-NEXT: v_mul_hi_u32 v3, s10, v1 1677; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 1678; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1679; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 1680; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1681; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2 1682; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v5, 0 1683; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1684; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 1685; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6 1686; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2] 1687; GFX9-NEXT: v_mov_b32_e32 v6, s11 1688; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, s10, v0 1689; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v5, v[1:2] 1690; GFX9-NEXT: v_mov_b32_e32 v4, s9 1691; GFX9-NEXT: s_ashr_i32 s10, s3, 31 1692; GFX9-NEXT: v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc 1693; GFX9-NEXT: v_sub_u32_e32 v0, s11, v1 1694; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v6 1695; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] 1696; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7 1697; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v4, vcc 1698; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1] 1699; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v6 1700; GFX9-NEXT: v_subrev_co_u32_e32 v9, vcc, s8, v7 1701; GFX9-NEXT: v_cndmask_b32_e64 v8, v1, v2, s[0:1] 1702; GFX9-NEXT: v_subbrev_co_u32_e64 v10, s[0:1], 0, v0, vcc 1703; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], 1, v5 1704; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1] 1705; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v10 1706; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1] 1707; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v9 1708; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] 1709; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v10 1710; GFX9-NEXT: v_cndmask_b32_e64 v12, v1, v12, s[0:1] 1711; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v2 1712; GFX9-NEXT: v_addc_co_u32_e64 v14, s[0:1], 0, v11, s[0:1] 1713; GFX9-NEXT: s_add_u32 s0, s18, s6 1714; GFX9-NEXT: s_addc_u32 s1, s19, s6 1715; GFX9-NEXT: s_add_u32 s2, s2, s10 1716; GFX9-NEXT: s_mov_b32 s11, s10 1717; GFX9-NEXT: s_addc_u32 s3, s3, s10 1718; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], s[10:11] 1719; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s3 1720; GFX9-NEXT: v_cvt_f32_u32_e32 v15, s2 1721; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v4, vcc 1722; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 1723; GFX9-NEXT: v_add_f32_e32 v1, v1, v15 1724; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 1725; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s8, v9 1726; GFX9-NEXT: v_subbrev_co_u32_e32 v15, vcc, 0, v0, vcc 1727; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 1728; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 1729; GFX9-NEXT: v_trunc_f32_e32 v16, v1 1730; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v16 1731; GFX9-NEXT: v_add_f32_e32 v0, v1, v0 1732; GFX9-NEXT: v_cvt_u32_f32_e32 v17, v0 1733; GFX9-NEXT: s_xor_b64 s[8:9], s[0:1], s[6:7] 1734; GFX9-NEXT: s_sub_u32 s5, 0, s2 1735; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 1736; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v17, 0 1737; GFX9-NEXT: v_cndmask_b32_e32 v12, v2, v13, vcc 1738; GFX9-NEXT: v_cvt_u32_f32_e32 v13, v16 1739; GFX9-NEXT: s_subb_u32 s20, 0, s3 1740; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc 1741; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc 1742; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v13, v[1:2] 1743; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v8 1744; GFX9-NEXT: v_cndmask_b32_e64 v8, v3, v11, s[0:1] 1745; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[18:19], s20, v17, v[1:2] 1746; GFX9-NEXT: v_mul_lo_u32 v2, v13, v0 1747; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v15, vcc 1748; GFX9-NEXT: v_mul_lo_u32 v3, v17, v1 1749; GFX9-NEXT: v_mul_hi_u32 v10, v17, v0 1750; GFX9-NEXT: v_mul_hi_u32 v0, v13, v0 1751; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[0:1] 1752; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 1753; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1754; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v10 1755; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1756; GFX9-NEXT: v_mul_lo_u32 v10, v13, v1 1757; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 1758; GFX9-NEXT: v_mul_hi_u32 v3, v17, v1 1759; GFX9-NEXT: v_mul_hi_u32 v1, v13, v1 1760; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v10, v0 1761; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1762; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 1763; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1764; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 1765; GFX9-NEXT: v_add_u32_e32 v3, v10, v3 1766; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1767; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v17, v0 1768; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1 1769; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[18:19], s5, v10, 0 1770; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v13, v1, vcc 1771; GFX9-NEXT: v_mov_b32_e32 v0, v3 1772; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v4, s[0:1] 1773; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[0:1] 1774; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v11, v[0:1] 1775; GFX9-NEXT: v_xor_b32_e32 v5, s16, v5 1776; GFX9-NEXT: v_xor_b32_e32 v8, s17, v8 1777; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s20, v10, v[0:1] 1778; GFX9-NEXT: v_mov_b32_e32 v9, s17 1779; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s16, v5 1780; GFX9-NEXT: v_xor_b32_e32 v4, s4, v7 1781; GFX9-NEXT: v_mul_lo_u32 v5, v11, v2 1782; GFX9-NEXT: v_mul_lo_u32 v7, v10, v3 1783; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v8, v9, vcc 1784; GFX9-NEXT: v_mul_hi_u32 v8, v10, v2 1785; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v7 1786; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1787; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v8 1788; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1789; GFX9-NEXT: v_mul_lo_u32 v8, v11, v3 1790; GFX9-NEXT: v_mul_hi_u32 v2, v11, v2 1791; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 1792; GFX9-NEXT: v_mul_hi_u32 v7, v10, v3 1793; GFX9-NEXT: v_mul_hi_u32 v3, v11, v3 1794; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2 1795; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1796; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 1797; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1798; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 1799; GFX9-NEXT: v_add_u32_e32 v7, v8, v7 1800; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1801; GFX9-NEXT: v_add3_u32 v3, v7, v5, v3 1802; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2 1803; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v11, v3, vcc 1804; GFX9-NEXT: v_mul_lo_u32 v5, s9, v2 1805; GFX9-NEXT: v_mul_lo_u32 v7, s8, v3 1806; GFX9-NEXT: v_mul_hi_u32 v9, s8, v2 1807; GFX9-NEXT: v_mul_hi_u32 v2, s9, v2 1808; GFX9-NEXT: v_mul_hi_u32 v12, s9, v3 1809; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v7 1810; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1811; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v9 1812; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1813; GFX9-NEXT: v_mul_lo_u32 v9, s9, v3 1814; GFX9-NEXT: v_add_u32_e32 v5, v7, v5 1815; GFX9-NEXT: v_mul_hi_u32 v7, s8, v3 1816; GFX9-NEXT: v_xor_b32_e32 v6, s4, v6 1817; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v9, v2 1818; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1819; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 1820; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1821; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v2, v5 1822; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v10, 0 1823; GFX9-NEXT: v_mov_b32_e32 v8, s4 1824; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1825; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s4, v4 1826; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v8, vcc 1827; GFX9-NEXT: v_add_u32_e32 v6, v9, v7 1828; GFX9-NEXT: v_add3_u32 v8, v6, v11, v12 1829; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s2, v8, v[3:4] 1830; GFX9-NEXT: v_mov_b32_e32 v9, s9 1831; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s8, v2 1832; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s3, v10, v[6:7] 1833; GFX9-NEXT: v_mov_b32_e32 v3, s3 1834; GFX9-NEXT: v_subb_co_u32_e64 v7, s[0:1], v9, v6, vcc 1835; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v7 1836; GFX9-NEXT: v_sub_u32_e32 v6, s9, v6 1837; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[0:1] 1838; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v2 1839; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] 1840; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v7 1841; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v3, vcc 1842; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[0:1] 1843; GFX9-NEXT: v_subrev_co_u32_e32 v11, vcc, s2, v2 1844; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[0:1], 0, v6, vcc 1845; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v12 1846; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] 1847; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v11 1848; GFX9-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[0:1] 1849; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s3, v12 1850; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[0:1] 1851; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v10 1852; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v6, v3, vcc 1853; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v8, s[0:1] 1854; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, 1, v14 1855; GFX9-NEXT: v_addc_co_u32_e32 v16, vcc, 0, v15, vcc 1856; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 1857; GFX9-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc 1858; GFX9-NEXT: v_cndmask_b32_e32 v14, v15, v16, vcc 1859; GFX9-NEXT: v_subrev_co_u32_e64 v15, s[0:1], s2, v11 1860; GFX9-NEXT: v_subbrev_co_u32_e64 v3, s[0:1], 0, v3, s[0:1] 1861; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9 1862; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v15, vcc 1863; GFX9-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc 1864; GFX9-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[0:1] 1865; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v14, s[0:1] 1866; GFX9-NEXT: v_cndmask_b32_e64 v9, v2, v9, s[0:1] 1867; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[0:1] 1868; GFX9-NEXT: s_xor_b64 s[0:1], s[6:7], s[10:11] 1869; GFX9-NEXT: v_xor_b32_e32 v2, s0, v6 1870; GFX9-NEXT: v_xor_b32_e32 v3, s1, v8 1871; GFX9-NEXT: v_mov_b32_e32 v6, s1 1872; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s0, v2 1873; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v6, vcc 1874; GFX9-NEXT: v_xor_b32_e32 v6, s6, v9 1875; GFX9-NEXT: v_mov_b32_e32 v13, 0 1876; GFX9-NEXT: v_xor_b32_e32 v7, s6, v7 1877; GFX9-NEXT: v_mov_b32_e32 v8, s6 1878; GFX9-NEXT: v_subrev_co_u32_e32 v6, vcc, s6, v6 1879; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v8, vcc 1880; GFX9-NEXT: global_store_dwordx4 v13, v[0:3], s[12:13] 1881; GFX9-NEXT: global_store_dwordx4 v13, v[4:7], s[14:15] 1882; GFX9-NEXT: s_endpgm 1883; 1884; GFX10-LABEL: sdivrem_v2i64: 1885; GFX10: ; %bb.0: 1886; GFX10-NEXT: s_clause 0x1 1887; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x20 1888; GFX10-NEXT: s_load_dwordx8 s[12:19], s[8:9], 0x0 1889; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1890; GFX10-NEXT: s_ashr_i32 s8, s1, 31 1891; GFX10-NEXT: s_ashr_i32 s4, s17, 31 1892; GFX10-NEXT: s_mov_b32 s9, s8 1893; GFX10-NEXT: s_add_u32 s10, s16, s4 1894; GFX10-NEXT: s_addc_u32 s11, s17, s4 1895; GFX10-NEXT: s_add_u32 s0, s0, s8 1896; GFX10-NEXT: s_addc_u32 s1, s1, s8 1897; GFX10-NEXT: s_mov_b32 s5, s4 1898; GFX10-NEXT: s_xor_b64 s[6:7], s[0:1], s[8:9] 1899; GFX10-NEXT: s_xor_b64 s[0:1], s[10:11], s[4:5] 1900; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s7 1901; GFX10-NEXT: s_sub_u32 s21, 0, s6 1902; GFX10-NEXT: s_subb_u32 s20, 0, s7 1903; GFX10-NEXT: s_xor_b64 s[16:17], s[4:5], s[8:9] 1904; GFX10-NEXT: s_ashr_i32 s8, s19, 31 1905; GFX10-NEXT: s_ashr_i32 s10, s3, 31 1906; GFX10-NEXT: s_add_u32 s18, s18, s8 1907; GFX10-NEXT: s_addc_u32 s19, s19, s8 1908; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s6 1909; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 1910; GFX10-NEXT: s_add_u32 s2, s2, s10 1911; GFX10-NEXT: s_mov_b32 s11, s10 1912; GFX10-NEXT: s_addc_u32 s3, s3, s10 1913; GFX10-NEXT: s_mov_b32 s9, s8 1914; GFX10-NEXT: s_xor_b64 s[2:3], s[2:3], s[10:11] 1915; GFX10-NEXT: v_add_f32_e32 v0, v1, v0 1916; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s3 1917; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s2 1918; GFX10-NEXT: s_xor_b64 s[18:19], s[18:19], s[8:9] 1919; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 1920; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 1921; GFX10-NEXT: v_add_f32_e32 v1, v1, v2 1922; GFX10-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 1923; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 1924; GFX10-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 1925; GFX10-NEXT: v_trunc_f32_e32 v2, v2 1926; GFX10-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v1 1927; GFX10-NEXT: v_mul_f32_e32 v1, 0xcf800000, v2 1928; GFX10-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 1929; GFX10-NEXT: v_cvt_u32_f32_e32 v9, v2 1930; GFX10-NEXT: v_add_f32_e32 v0, v1, v0 1931; GFX10-NEXT: v_trunc_f32_e32 v6, v4 1932; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v0 1933; GFX10-NEXT: v_mul_f32_e32 v4, 0xcf800000, v6 1934; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s21, v7, 0 1935; GFX10-NEXT: v_add_f32_e32 v3, v4, v3 1936; GFX10-NEXT: s_sub_u32 s5, 0, s2 1937; GFX10-NEXT: v_cvt_u32_f32_e32 v8, v3 1938; GFX10-NEXT: v_mul_hi_u32 v10, v9, v0 1939; GFX10-NEXT: v_mad_u64_u32 v[2:3], s22, s5, v8, 0 1940; GFX10-NEXT: v_mad_u64_u32 v[4:5], s22, s21, v9, v[1:2] 1941; GFX10-NEXT: v_cvt_u32_f32_e32 v5, v6 1942; GFX10-NEXT: v_mov_b32_e32 v1, v3 1943; GFX10-NEXT: v_mul_hi_u32 v6, v7, v0 1944; GFX10-NEXT: s_subb_u32 s22, 0, s3 1945; GFX10-NEXT: v_mul_hi_u32 v12, v8, v2 1946; GFX10-NEXT: v_mul_lo_u32 v11, v5, v2 1947; GFX10-NEXT: v_mad_u64_u32 v[3:4], s23, s20, v7, v[4:5] 1948; GFX10-NEXT: v_mul_lo_u32 v4, v9, v0 1949; GFX10-NEXT: v_mad_u64_u32 v[0:1], s23, s5, v5, v[1:2] 1950; GFX10-NEXT: v_mul_hi_u32 v2, v5, v2 1951; GFX10-NEXT: v_mul_lo_u32 v13, v7, v3 1952; GFX10-NEXT: v_mul_lo_u32 v14, v9, v3 1953; GFX10-NEXT: v_mul_hi_u32 v15, v7, v3 1954; GFX10-NEXT: v_mad_u64_u32 v[0:1], s23, s22, v8, v[0:1] 1955; GFX10-NEXT: v_mul_hi_u32 v1, v9, v3 1956; GFX10-NEXT: v_add_co_u32 v3, s23, v4, v13 1957; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s23 1958; GFX10-NEXT: v_add_co_u32 v10, s23, v14, v10 1959; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s23 1960; GFX10-NEXT: v_mul_lo_u32 v14, v8, v0 1961; GFX10-NEXT: v_add_co_u32 v3, s23, v3, v6 1962; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s23 1963; GFX10-NEXT: v_add_co_u32 v6, s23, v10, v15 1964; GFX10-NEXT: v_mul_lo_u32 v15, v5, v0 1965; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s23 1966; GFX10-NEXT: v_mul_hi_u32 v16, v8, v0 1967; GFX10-NEXT: v_mul_hi_u32 v17, v5, v0 1968; GFX10-NEXT: v_add_nc_u32_e32 v0, v4, v3 1969; GFX10-NEXT: v_add_co_u32 v4, s23, v11, v14 1970; GFX10-NEXT: v_add_nc_u32_e32 v3, v13, v10 1971; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s23 1972; GFX10-NEXT: v_add_co_u32 v2, s23, v15, v2 1973; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s23 1974; GFX10-NEXT: v_add_co_u32 v0, s23, v6, v0 1975; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s23 1976; GFX10-NEXT: v_add_co_u32 v4, s23, v4, v12 1977; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s23 1978; GFX10-NEXT: v_add_co_u32 v2, s23, v2, v16 1979; GFX10-NEXT: v_add3_u32 v1, v3, v6, v1 1980; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v7, v0 1981; GFX10-NEXT: v_add_nc_u32_e32 v3, v10, v4 1982; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s23 1983; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v9, v1, vcc_lo 1984; GFX10-NEXT: v_mad_u64_u32 v[0:1], s23, s21, v6, 0 1985; GFX10-NEXT: v_add_co_u32 v2, s23, v2, v3 1986; GFX10-NEXT: v_add_nc_u32_e32 v4, v11, v12 1987; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s23 1988; GFX10-NEXT: v_mov_b32_e32 v10, 0 1989; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v8, v2 1990; GFX10-NEXT: v_mul_hi_u32 v11, v7, v0 1991; GFX10-NEXT: v_add3_u32 v3, v4, v3, v17 1992; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v3, vcc_lo 1993; GFX10-NEXT: v_mad_u64_u32 v[2:3], s23, s5, v8, 0 1994; GFX10-NEXT: v_mad_u64_u32 v[4:5], s21, s21, v7, v[1:2] 1995; GFX10-NEXT: v_mov_b32_e32 v1, v3 1996; GFX10-NEXT: v_mul_lo_u32 v12, v9, v2 1997; GFX10-NEXT: v_mul_hi_u32 v13, v8, v2 1998; GFX10-NEXT: v_mad_u64_u32 v[3:4], s20, s20, v6, v[4:5] 1999; GFX10-NEXT: v_mul_lo_u32 v4, v7, v0 2000; GFX10-NEXT: v_mul_hi_u32 v5, v6, v0 2001; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s5, v9, v[1:2] 2002; GFX10-NEXT: v_mul_hi_u32 v2, v9, v2 2003; GFX10-NEXT: v_mul_lo_u32 v14, v6, v3 2004; GFX10-NEXT: v_mul_lo_u32 v15, v7, v3 2005; GFX10-NEXT: v_mul_hi_u32 v16, v6, v3 2006; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s22, v8, v[0:1] 2007; GFX10-NEXT: v_mul_hi_u32 v1, v7, v3 2008; GFX10-NEXT: v_add_co_u32 v3, s5, v4, v14 2009; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5 2010; GFX10-NEXT: v_add_co_u32 v11, s5, v15, v11 2011; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s5 2012; GFX10-NEXT: v_add_co_u32 v3, s5, v3, v5 2013; GFX10-NEXT: v_mul_lo_u32 v15, v8, v0 2014; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s5 2015; GFX10-NEXT: v_add_co_u32 v5, s5, v11, v16 2016; GFX10-NEXT: v_mul_lo_u32 v16, v9, v0 2017; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5 2018; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3 2019; GFX10-NEXT: v_mul_hi_u32 v17, v8, v0 2020; GFX10-NEXT: v_mul_hi_u32 v0, v9, v0 2021; GFX10-NEXT: v_add_nc_u32_e32 v4, v14, v11 2022; GFX10-NEXT: v_add_co_u32 v11, s5, v12, v15 2023; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s5 2024; GFX10-NEXT: v_add_co_u32 v2, s5, v16, v2 2025; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s5 2026; GFX10-NEXT: v_add_co_u32 v3, s5, v5, v3 2027; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s5 2028; GFX10-NEXT: v_add_co_u32 v11, s5, v11, v13 2029; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5 2030; GFX10-NEXT: v_add_co_u32 v2, s5, v2, v17 2031; GFX10-NEXT: v_add3_u32 v1, v4, v5, v1 2032; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v6, v3 2033; GFX10-NEXT: v_add_nc_u32_e32 v4, v12, v11 2034; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s5 2035; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v1, vcc_lo 2036; GFX10-NEXT: v_mul_lo_u32 v6, s1, v3 2037; GFX10-NEXT: v_add_co_u32 v2, s5, v2, v4 2038; GFX10-NEXT: v_add_nc_u32_e32 v5, v14, v13 2039; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5 2040; GFX10-NEXT: v_mul_lo_u32 v11, s0, v1 2041; GFX10-NEXT: v_mul_hi_u32 v7, s0, v3 2042; GFX10-NEXT: v_mul_hi_u32 v3, s1, v3 2043; GFX10-NEXT: v_mul_lo_u32 v12, s1, v1 2044; GFX10-NEXT: v_add3_u32 v0, v5, v4, v0 2045; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v8, v2 2046; GFX10-NEXT: v_mul_hi_u32 v4, s0, v1 2047; GFX10-NEXT: v_mul_hi_u32 v5, s1, v1 2048; GFX10-NEXT: v_add_co_u32 v1, s5, v6, v11 2049; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v9, v0, vcc_lo 2050; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s5 2051; GFX10-NEXT: v_add_co_u32 v3, s5, v12, v3 2052; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5 2053; GFX10-NEXT: v_add_co_u32 v1, s5, v1, v7 2054; GFX10-NEXT: v_mul_lo_u32 v0, s19, v2 2055; GFX10-NEXT: v_mul_lo_u32 v12, s18, v8 2056; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s5 2057; GFX10-NEXT: v_add_co_u32 v3, s5, v3, v4 2058; GFX10-NEXT: v_mul_hi_u32 v9, s18, v2 2059; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5 2060; GFX10-NEXT: v_mul_hi_u32 v2, s19, v2 2061; GFX10-NEXT: v_mul_lo_u32 v7, s19, v8 2062; GFX10-NEXT: v_add_nc_u32_e32 v1, v6, v1 2063; GFX10-NEXT: v_add_co_u32 v6, s5, v0, v12 2064; GFX10-NEXT: v_mul_hi_u32 v13, s18, v8 2065; GFX10-NEXT: v_add_nc_u32_e32 v4, v11, v4 2066; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s5 2067; GFX10-NEXT: v_add_co_u32 v12, s5, v3, v1 2068; GFX10-NEXT: v_add_co_u32 v2, s20, v7, v2 2069; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s5 2070; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, s6, v12, 0 2071; GFX10-NEXT: v_add_co_u32 v6, s5, v6, v9 2072; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s5 2073; GFX10-NEXT: v_add_co_u32 v9, s5, v2, v13 2074; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s20 2075; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s5 2076; GFX10-NEXT: v_add3_u32 v4, v4, v7, v5 2077; GFX10-NEXT: v_add_nc_u32_e32 v6, v11, v6 2078; GFX10-NEXT: v_mul_hi_u32 v5, s19, v8 2079; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v12, 1 2080; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v2 2081; GFX10-NEXT: v_mad_u64_u32 v[1:2], s5, s6, v4, v[1:2] 2082; GFX10-NEXT: v_add_co_u32 v6, s5, v9, v6 2083; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s5 2084; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v4, vcc_lo 2085; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v7, 1 2086; GFX10-NEXT: v_mad_u64_u32 v[1:2], s5, s7, v12, v[1:2] 2087; GFX10-NEXT: v_add3_u32 v5, v3, v9, v5 2088; GFX10-NEXT: v_mad_u64_u32 v[2:3], s5, s2, v6, 0 2089; GFX10-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, 0, v8, vcc_lo 2090; GFX10-NEXT: v_sub_co_u32 v14, vcc_lo, s0, v0 2091; GFX10-NEXT: v_sub_nc_u32_e32 v9, s1, v1 2092; GFX10-NEXT: v_sub_co_ci_u32_e64 v15, s0, s1, v1, vcc_lo 2093; GFX10-NEXT: v_mov_b32_e32 v0, v3 2094; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, s7, v9, vcc_lo 2095; GFX10-NEXT: v_sub_co_u32 v3, vcc_lo, v14, s6 2096; GFX10-NEXT: v_subrev_co_ci_u32_e64 v16, s0, 0, v9, vcc_lo 2097; GFX10-NEXT: v_cmp_le_u32_e64 s0, s6, v14 2098; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, s7, v9, vcc_lo 2099; GFX10-NEXT: v_cndmask_b32_e64 v17, 0, -1, s0 2100; GFX10-NEXT: v_cmp_le_u32_e64 s0, s6, v3 2101; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, -1, s0 2102; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v16 2103; GFX10-NEXT: v_cndmask_b32_e64 v19, 0, -1, s0 2104; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v15 2105; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, -1, s0 2106; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v5, v[0:1] 2107; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v16 2108; GFX10-NEXT: v_cndmask_b32_e64 v1, v19, v18, s0 2109; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v15 2110; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 2111; GFX10-NEXT: v_cndmask_b32_e64 v17, v20, v17, s0 2112; GFX10-NEXT: v_sub_co_u32 v1, s0, v3, s6 2113; GFX10-NEXT: v_subrev_co_ci_u32_e64 v9, s0, 0, v9, s0 2114; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc_lo 2115; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo 2116; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, s3, v6, v[0:1] 2117; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v17 2118; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc_lo 2119; GFX10-NEXT: v_cndmask_b32_e64 v1, v12, v7, s0 2120; GFX10-NEXT: v_cndmask_b32_e32 v7, v16, v9, vcc_lo 2121; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, s18, v2 2122; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v8, s0 2123; GFX10-NEXT: v_sub_co_ci_u32_e64 v8, s1, s19, v0, vcc_lo 2124; GFX10-NEXT: v_sub_nc_u32_e32 v0, s19, v0 2125; GFX10-NEXT: v_cndmask_b32_e64 v3, v14, v3, s0 2126; GFX10-NEXT: v_cndmask_b32_e64 v7, v15, v7, s0 2127; GFX10-NEXT: v_cmp_le_u32_e64 s0, s3, v8 2128; GFX10-NEXT: v_xor_b32_e32 v1, s16, v1 2129; GFX10-NEXT: v_subrev_co_ci_u32_e32 v11, vcc_lo, s3, v0, vcc_lo 2130; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v2 2131; GFX10-NEXT: v_xor_b32_e32 v4, s17, v4 2132; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, -1, s0 2133; GFX10-NEXT: v_xor_b32_e32 v3, s4, v3 2134; GFX10-NEXT: v_xor_b32_e32 v7, s4, v7 2135; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc_lo 2136; GFX10-NEXT: v_sub_co_u32 v13, vcc_lo, v2, s2 2137; GFX10-NEXT: v_subrev_co_ci_u32_e64 v14, s0, 0, v11, vcc_lo 2138; GFX10-NEXT: v_sub_co_u32 v0, s0, v1, s16 2139; GFX10-NEXT: v_subrev_co_ci_u32_e64 v1, s0, s17, v4, s0 2140; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s3, v8 2141; GFX10-NEXT: v_subrev_co_ci_u32_e32 v11, vcc_lo, s3, v11, vcc_lo 2142; GFX10-NEXT: v_cndmask_b32_e64 v4, v9, v12, s0 2143; GFX10-NEXT: v_cmp_le_u32_e64 s0, s3, v14 2144; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, -1, s0 2145; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v13 2146; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0 2147; GFX10-NEXT: v_add_co_u32 v15, s0, v6, 1 2148; GFX10-NEXT: v_add_co_ci_u32_e64 v16, s0, 0, v5, s0 2149; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s3, v14 2150; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v12, s0 2151; GFX10-NEXT: v_add_co_u32 v12, s0, v15, 1 2152; GFX10-NEXT: v_add_co_ci_u32_e64 v17, s0, 0, v16, s0 2153; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 2154; GFX10-NEXT: v_sub_co_u32 v9, s0, v13, s2 2155; GFX10-NEXT: v_subrev_co_ci_u32_e64 v11, s0, 0, v11, s0 2156; GFX10-NEXT: v_cndmask_b32_e32 v12, v15, v12, vcc_lo 2157; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v4 2158; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v17, vcc_lo 2159; GFX10-NEXT: v_cndmask_b32_e32 v4, v13, v9, vcc_lo 2160; GFX10-NEXT: v_cndmask_b32_e32 v9, v14, v11, vcc_lo 2161; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v12, s0 2162; GFX10-NEXT: v_cndmask_b32_e64 v11, v5, v15, s0 2163; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v4, s0 2164; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v9, s0 2165; GFX10-NEXT: s_xor_b64 s[0:1], s[8:9], s[10:11] 2166; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v3, s4 2167; GFX10-NEXT: v_xor_b32_e32 v3, s0, v6 2168; GFX10-NEXT: v_xor_b32_e32 v6, s1, v11 2169; GFX10-NEXT: v_subrev_co_ci_u32_e32 v5, vcc_lo, s4, v7, vcc_lo 2170; GFX10-NEXT: v_xor_b32_e32 v7, s8, v2 2171; GFX10-NEXT: v_xor_b32_e32 v8, s8, v8 2172; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v3, s0 2173; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v6, vcc_lo 2174; GFX10-NEXT: v_sub_co_u32 v6, vcc_lo, v7, s8 2175; GFX10-NEXT: v_subrev_co_ci_u32_e32 v7, vcc_lo, s8, v8, vcc_lo 2176; GFX10-NEXT: global_store_dwordx4 v10, v[0:3], s[12:13] 2177; GFX10-NEXT: global_store_dwordx4 v10, v[4:7], s[14:15] 2178; GFX10-NEXT: s_endpgm 2179 %div = sdiv <2 x i64> %x, %y 2180 store <2 x i64> %div, ptr addrspace(1) %out0 2181 %rem = srem <2 x i64> %x, %y 2182 store <2 x i64> %rem, ptr addrspace(1) %out1 2183 ret void 2184} 2185 2186define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i8 %x, i8 %y) { 2187; GFX8-LABEL: sdiv_i8: 2188; GFX8: ; %bb.0: 2189; GFX8-NEXT: s_load_dword s4, s[8:9], 0x10 2190; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2191; GFX8-NEXT: s_bfe_i32 s0, s4, 0x80008 2192; GFX8-NEXT: s_ashr_i32 s5, s0, 31 2193; GFX8-NEXT: s_add_i32 s0, s0, s5 2194; GFX8-NEXT: s_xor_b32 s6, s0, s5 2195; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s6 2196; GFX8-NEXT: s_sub_i32 s0, 0, s6 2197; GFX8-NEXT: s_sext_i32_i8 s4, s4 2198; GFX8-NEXT: s_ashr_i32 s7, s4, 31 2199; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 2200; GFX8-NEXT: s_add_i32 s4, s4, s7 2201; GFX8-NEXT: s_xor_b32 s4, s4, s7 2202; GFX8-NEXT: s_xor_b32 s5, s7, s5 2203; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2204; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 2205; GFX8-NEXT: v_mul_lo_u32 v1, s0, v0 2206; GFX8-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2207; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 2208; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 2209; GFX8-NEXT: v_mul_hi_u32 v2, s4, v0 2210; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2211; GFX8-NEXT: v_mov_b32_e32 v0, s0 2212; GFX8-NEXT: v_mov_b32_e32 v1, s1 2213; GFX8-NEXT: v_mul_lo_u32 v3, v2, s6 2214; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 2215; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s4, v3 2216; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 2217; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2218; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s6, v3 2219; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2220; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 2221; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 2222; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2223; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s6, v3 2224; GFX8-NEXT: v_xor_b32_e32 v2, s5, v2 2225; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2226; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s5, v2 2227; GFX8-NEXT: v_xor_b32_e32 v3, s7, v3 2228; GFX8-NEXT: flat_store_byte v[0:1], v2 2229; GFX8-NEXT: v_mov_b32_e32 v0, s2 2230; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s7, v3 2231; GFX8-NEXT: v_mov_b32_e32 v1, s3 2232; GFX8-NEXT: flat_store_byte v[0:1], v3 2233; GFX8-NEXT: s_endpgm 2234; 2235; GFX9-LABEL: sdiv_i8: 2236; GFX9: ; %bb.0: 2237; GFX9-NEXT: s_load_dword s0, s[8:9], 0x10 2238; GFX9-NEXT: v_mov_b32_e32 v2, 0 2239; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2240; GFX9-NEXT: s_bfe_i32 s1, s0, 0x80008 2241; GFX9-NEXT: s_ashr_i32 s4, s1, 31 2242; GFX9-NEXT: s_add_i32 s1, s1, s4 2243; GFX9-NEXT: s_xor_b32 s5, s1, s4 2244; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s5 2245; GFX9-NEXT: s_sub_i32 s1, 0, s5 2246; GFX9-NEXT: s_sext_i32_i8 s0, s0 2247; GFX9-NEXT: s_ashr_i32 s6, s0, 31 2248; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 2249; GFX9-NEXT: s_add_i32 s0, s0, s6 2250; GFX9-NEXT: s_xor_b32 s7, s0, s6 2251; GFX9-NEXT: s_xor_b32 s4, s6, s4 2252; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2253; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 2254; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 2255; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2256; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 2257; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 2258; GFX9-NEXT: v_mul_hi_u32 v0, s7, v0 2259; GFX9-NEXT: v_mul_lo_u32 v1, v0, s5 2260; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 2261; GFX9-NEXT: v_sub_u32_e32 v1, s7, v1 2262; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 2263; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2264; GFX9-NEXT: v_subrev_u32_e32 v3, s5, v1 2265; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2266; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 2267; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 2268; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2269; GFX9-NEXT: v_subrev_u32_e32 v3, s5, v1 2270; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2271; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 2272; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 2273; GFX9-NEXT: v_xor_b32_e32 v1, s6, v1 2274; GFX9-NEXT: v_subrev_u32_e32 v1, s6, v1 2275; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2276; GFX9-NEXT: global_store_byte v2, v0, s[0:1] 2277; GFX9-NEXT: global_store_byte v2, v1, s[2:3] 2278; GFX9-NEXT: s_endpgm 2279; 2280; GFX10-LABEL: sdiv_i8: 2281; GFX10: ; %bb.0: 2282; GFX10-NEXT: s_load_dword s0, s[8:9], 0x10 2283; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2284; GFX10-NEXT: s_bfe_i32 s1, s0, 0x80008 2285; GFX10-NEXT: s_sext_i32_i8 s0, s0 2286; GFX10-NEXT: s_ashr_i32 s4, s1, 31 2287; GFX10-NEXT: s_ashr_i32 s6, s0, 31 2288; GFX10-NEXT: s_add_i32 s1, s1, s4 2289; GFX10-NEXT: s_add_i32 s0, s0, s6 2290; GFX10-NEXT: s_xor_b32 s5, s1, s4 2291; GFX10-NEXT: s_xor_b32 s0, s0, s6 2292; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s5 2293; GFX10-NEXT: s_sub_i32 s1, 0, s5 2294; GFX10-NEXT: s_xor_b32 s4, s6, s4 2295; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 2296; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2297; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 2298; GFX10-NEXT: v_mul_lo_u32 v1, s1, v0 2299; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 2300; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 2301; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 2302; GFX10-NEXT: v_mul_lo_u32 v1, v0, s5 2303; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 2304; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 2305; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2306; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s5, v1 2307; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s5, v1 2308; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2309; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2310; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 2311; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s5, v1 2312; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s5, v1 2313; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2314; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2315; GFX10-NEXT: v_mov_b32_e32 v2, 0 2316; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 2317; GFX10-NEXT: v_xor_b32_e32 v1, s6, v1 2318; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 2319; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s6, v1 2320; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2321; GFX10-NEXT: global_store_byte v2, v0, s[0:1] 2322; GFX10-NEXT: global_store_byte v2, v1, s[2:3] 2323; GFX10-NEXT: s_endpgm 2324 %div = sdiv i8 %x, %y 2325 store i8 %div, ptr addrspace(1) %out0 2326 %rem = srem i8 %x, %y 2327 store i8 %rem, ptr addrspace(1) %out1 2328 ret void 2329} 2330 2331define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i8> %x, <2 x i8> %y) { 2332; GFX8-LABEL: sdivrem_v2i8: 2333; GFX8: ; %bb.0: 2334; GFX8-NEXT: s_load_dword s2, s[8:9], 0x10 2335; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2336; GFX8-NEXT: s_bfe_i32 s0, s2, 0x80010 2337; GFX8-NEXT: s_ashr_i32 s3, s0, 31 2338; GFX8-NEXT: s_add_i32 s0, s0, s3 2339; GFX8-NEXT: s_xor_b32 s10, s0, s3 2340; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s10 2341; GFX8-NEXT: s_sub_i32 s4, 0, s10 2342; GFX8-NEXT: s_bfe_i32 s1, s2, 0x80018 2343; GFX8-NEXT: s_ashr_i32 s12, s1, 31 2344; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 2345; GFX8-NEXT: s_add_i32 s1, s1, s12 2346; GFX8-NEXT: s_xor_b32 s13, s1, s12 2347; GFX8-NEXT: s_sext_i32_i8 s0, s2 2348; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2349; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 2350; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s13 2351; GFX8-NEXT: s_ashr_i32 s11, s0, 31 2352; GFX8-NEXT: s_add_i32 s0, s0, s11 2353; GFX8-NEXT: v_mul_lo_u32 v1, s4, v0 2354; GFX8-NEXT: s_xor_b32 s0, s0, s11 2355; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 2356; GFX8-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0 2357; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 2358; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 2359; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 2360; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 2361; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 2362; GFX8-NEXT: v_mul_lo_u32 v2, v0, s10 2363; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 2364; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s0, v2 2365; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 2366; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2367; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s10, v2 2368; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2369; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 2370; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 2371; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2372; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s10, v2 2373; GFX8-NEXT: s_sub_i32 s1, 0, s13 2374; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2375; GFX8-NEXT: v_mul_lo_u32 v3, s1, v1 2376; GFX8-NEXT: s_bfe_i32 s1, s2, 0x80008 2377; GFX8-NEXT: s_ashr_i32 s2, s1, 31 2378; GFX8-NEXT: s_add_i32 s1, s1, s2 2379; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 2380; GFX8-NEXT: s_xor_b32 s1, s1, s2 2381; GFX8-NEXT: s_xor_b32 s0, s11, s3 2382; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 2383; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 2384; GFX8-NEXT: v_mul_hi_u32 v1, s1, v1 2385; GFX8-NEXT: v_xor_b32_e32 v2, s11, v2 2386; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 2387; GFX8-NEXT: v_mul_lo_u32 v3, v1, s13 2388; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s11, v2 2389; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 2390; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s1, v3 2391; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s13, v3 2392; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2393; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s13, v3 2394; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2395; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 2396; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s13, v3 2397; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2398; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s13, v3 2399; GFX8-NEXT: s_xor_b32 s0, s2, s12 2400; GFX8-NEXT: v_xor_b32_e32 v1, s0, v1 2401; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2402; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, s0, v1 2403; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1 2404; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 2405; GFX8-NEXT: v_xor_b32_e32 v3, s2, v3 2406; GFX8-NEXT: v_or_b32_sdwa v4, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2407; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2408; GFX8-NEXT: v_mov_b32_e32 v0, s4 2409; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s2, v3 2410; GFX8-NEXT: v_mov_b32_e32 v1, s5 2411; GFX8-NEXT: flat_store_short v[0:1], v4 2412; GFX8-NEXT: v_and_b32_e32 v0, 0xff, v3 2413; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 2414; GFX8-NEXT: v_or_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2415; GFX8-NEXT: v_mov_b32_e32 v0, s6 2416; GFX8-NEXT: v_mov_b32_e32 v1, s7 2417; GFX8-NEXT: flat_store_short v[0:1], v2 2418; GFX8-NEXT: s_endpgm 2419; 2420; GFX9-LABEL: sdivrem_v2i8: 2421; GFX9: ; %bb.0: 2422; GFX9-NEXT: s_load_dword s4, s[8:9], 0x10 2423; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2424; GFX9-NEXT: s_bfe_i32 s0, s4, 0x80010 2425; GFX9-NEXT: s_ashr_i32 s5, s0, 31 2426; GFX9-NEXT: s_add_i32 s0, s0, s5 2427; GFX9-NEXT: s_xor_b32 s6, s0, s5 2428; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s6 2429; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2430; GFX9-NEXT: s_bfe_i32 s8, s4, 0x80018 2431; GFX9-NEXT: s_ashr_i32 s9, s8, 31 2432; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 2433; GFX9-NEXT: s_add_i32 s8, s8, s9 2434; GFX9-NEXT: s_xor_b32 s8, s8, s9 2435; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s8 2436; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2437; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 2438; GFX9-NEXT: s_sub_i32 s10, 0, s6 2439; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 2440; GFX9-NEXT: s_sext_i32_i8 s7, s4 2441; GFX9-NEXT: v_mul_lo_u32 v2, s10, v0 2442; GFX9-NEXT: s_ashr_i32 s10, s7, 31 2443; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 2444; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 2445; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 2446; GFX9-NEXT: s_add_i32 s7, s7, s10 2447; GFX9-NEXT: s_xor_b32 s7, s7, s10 2448; GFX9-NEXT: s_sub_i32 s11, 0, s8 2449; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 2450; GFX9-NEXT: v_mul_hi_u32 v0, s7, v0 2451; GFX9-NEXT: v_mul_lo_u32 v2, s11, v1 2452; GFX9-NEXT: s_bfe_i32 s4, s4, 0x80008 2453; GFX9-NEXT: s_ashr_i32 s11, s4, 31 2454; GFX9-NEXT: v_mul_lo_u32 v3, v0, s6 2455; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 2456; GFX9-NEXT: s_add_i32 s4, s4, s11 2457; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 2458; GFX9-NEXT: v_sub_u32_e32 v3, s7, v3 2459; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 2460; GFX9-NEXT: s_xor_b32 s4, s4, s11 2461; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 2462; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2463; GFX9-NEXT: v_subrev_u32_e32 v4, s6, v3 2464; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 2465; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2466; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 2467; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 2468; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2469; GFX9-NEXT: v_subrev_u32_e32 v4, s6, v3 2470; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc 2471; GFX9-NEXT: v_mul_lo_u32 v3, v1, s8 2472; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 2473; GFX9-NEXT: s_xor_b32 s5, s10, s5 2474; GFX9-NEXT: v_xor_b32_e32 v0, s5, v0 2475; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 2476; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 2477; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2478; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v3 2479; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2480; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 2481; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 2482; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2483; GFX9-NEXT: s_xor_b32 s4, s11, s9 2484; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 2485; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v3 2486; GFX9-NEXT: v_subrev_u32_e32 v1, s4, v1 2487; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2488; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1 2489; GFX9-NEXT: v_subrev_u32_e32 v0, s5, v0 2490; GFX9-NEXT: v_xor_b32_e32 v3, s11, v3 2491; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 2492; GFX9-NEXT: v_subrev_u32_e32 v3, s11, v3 2493; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2494; GFX9-NEXT: v_mov_b32_e32 v1, 0 2495; GFX9-NEXT: v_xor_b32_e32 v2, s10, v2 2496; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2497; GFX9-NEXT: global_store_short v1, v0, s[0:1] 2498; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v3 2499; GFX9-NEXT: v_subrev_u32_e32 v2, s10, v2 2500; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v0 2501; GFX9-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2502; GFX9-NEXT: global_store_short v1, v0, s[2:3] 2503; GFX9-NEXT: s_endpgm 2504; 2505; GFX10-LABEL: sdivrem_v2i8: 2506; GFX10: ; %bb.0: 2507; GFX10-NEXT: s_load_dword s0, s[8:9], 0x10 2508; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2509; GFX10-NEXT: s_bfe_i32 s1, s0, 0x80018 2510; GFX10-NEXT: s_bfe_i32 s3, s0, 0x80010 2511; GFX10-NEXT: s_ashr_i32 s2, s1, 31 2512; GFX10-NEXT: s_ashr_i32 s10, s3, 31 2513; GFX10-NEXT: s_add_i32 s1, s1, s2 2514; GFX10-NEXT: s_add_i32 s3, s3, s10 2515; GFX10-NEXT: s_xor_b32 s1, s1, s2 2516; GFX10-NEXT: s_xor_b32 s3, s3, s10 2517; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s1 2518; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s3 2519; GFX10-NEXT: s_sub_i32 s4, 0, s1 2520; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 2521; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 2522; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2523; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 2524; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 2525; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 2526; GFX10-NEXT: v_mul_lo_u32 v2, s4, v0 2527; GFX10-NEXT: s_sub_i32 s4, 0, s3 2528; GFX10-NEXT: v_mul_lo_u32 v3, s4, v1 2529; GFX10-NEXT: s_bfe_i32 s4, s0, 0x80008 2530; GFX10-NEXT: s_sext_i32_i8 s0, s0 2531; GFX10-NEXT: s_ashr_i32 s11, s4, 31 2532; GFX10-NEXT: s_ashr_i32 s12, s0, 31 2533; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 2534; GFX10-NEXT: s_add_i32 s4, s4, s11 2535; GFX10-NEXT: s_add_i32 s0, s0, s12 2536; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 2537; GFX10-NEXT: s_xor_b32 s4, s4, s11 2538; GFX10-NEXT: s_xor_b32 s0, s0, s12 2539; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 2540; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 2541; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0 2542; GFX10-NEXT: v_mul_hi_u32 v1, s0, v1 2543; GFX10-NEXT: v_mul_lo_u32 v2, v0, s1 2544; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 2545; GFX10-NEXT: v_mul_lo_u32 v3, v1, s3 2546; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v1 2547; GFX10-NEXT: v_sub_nc_u32_e32 v2, s4, v2 2548; GFX10-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0 2549; GFX10-NEXT: v_sub_nc_u32_e32 v3, s0, v3 2550; GFX10-NEXT: v_subrev_nc_u32_e32 v5, s1, v2 2551; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v2 2552; GFX10-NEXT: v_cmp_le_u32_e64 s0, s3, v3 2553; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s3, v3 2554; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2555; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo 2556; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v6, s0 2557; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s0 2558; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 2559; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v2 2560; GFX10-NEXT: v_subrev_nc_u32_e32 v5, s1, v2 2561; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v1 2562; GFX10-NEXT: v_cmp_le_u32_e64 s0, s3, v3 2563; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s3, v3 2564; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2565; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo 2566; GFX10-NEXT: s_xor_b32 s1, s11, s2 2567; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v6, s0 2568; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s0 2569; GFX10-NEXT: v_xor_b32_e32 v0, s1, v0 2570; GFX10-NEXT: v_xor_b32_e32 v2, s11, v2 2571; GFX10-NEXT: s_xor_b32 s0, s12, s10 2572; GFX10-NEXT: v_mov_b32_e32 v4, 0xff 2573; GFX10-NEXT: v_xor_b32_e32 v1, s0, v1 2574; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s1, v0 2575; GFX10-NEXT: v_xor_b32_e32 v3, s12, v3 2576; GFX10-NEXT: v_subrev_nc_u32_e32 v2, s11, v2 2577; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s0, v1 2578; GFX10-NEXT: v_and_b32_sdwa v0, v0, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2579; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s12, v3 2580; GFX10-NEXT: v_and_b32_sdwa v2, v2, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2581; GFX10-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2582; GFX10-NEXT: v_mov_b32_e32 v1, 0 2583; GFX10-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2584; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2585; GFX10-NEXT: global_store_short v1, v0, s[4:5] 2586; GFX10-NEXT: global_store_short v1, v2, s[6:7] 2587; GFX10-NEXT: s_endpgm 2588 %div = sdiv <2 x i8> %x, %y 2589 store <2 x i8> %div, ptr addrspace(1) %out0 2590 %rem = srem <2 x i8> %x, %y 2591 store <2 x i8> %rem, ptr addrspace(1) %out1 2592 ret void 2593} 2594 2595define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i16 %x, i16 %y) { 2596; GFX8-LABEL: sdiv_i16: 2597; GFX8: ; %bb.0: 2598; GFX8-NEXT: s_load_dword s4, s[8:9], 0x10 2599; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2600; GFX8-NEXT: s_bfe_i32 s0, s4, 0x100010 2601; GFX8-NEXT: s_ashr_i32 s5, s0, 31 2602; GFX8-NEXT: s_add_i32 s0, s0, s5 2603; GFX8-NEXT: s_xor_b32 s6, s0, s5 2604; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s6 2605; GFX8-NEXT: s_sub_i32 s0, 0, s6 2606; GFX8-NEXT: s_sext_i32_i16 s4, s4 2607; GFX8-NEXT: s_ashr_i32 s7, s4, 31 2608; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 2609; GFX8-NEXT: s_add_i32 s4, s4, s7 2610; GFX8-NEXT: s_xor_b32 s4, s4, s7 2611; GFX8-NEXT: s_xor_b32 s5, s7, s5 2612; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2613; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 2614; GFX8-NEXT: v_mul_lo_u32 v1, s0, v0 2615; GFX8-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2616; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 2617; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 2618; GFX8-NEXT: v_mul_hi_u32 v2, s4, v0 2619; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2620; GFX8-NEXT: v_mov_b32_e32 v0, s0 2621; GFX8-NEXT: v_mov_b32_e32 v1, s1 2622; GFX8-NEXT: v_mul_lo_u32 v3, v2, s6 2623; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 2624; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s4, v3 2625; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 2626; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2627; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s6, v3 2628; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2629; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 2630; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 2631; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2632; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s6, v3 2633; GFX8-NEXT: v_xor_b32_e32 v2, s5, v2 2634; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2635; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s5, v2 2636; GFX8-NEXT: v_xor_b32_e32 v3, s7, v3 2637; GFX8-NEXT: flat_store_short v[0:1], v2 2638; GFX8-NEXT: v_mov_b32_e32 v0, s2 2639; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s7, v3 2640; GFX8-NEXT: v_mov_b32_e32 v1, s3 2641; GFX8-NEXT: flat_store_short v[0:1], v3 2642; GFX8-NEXT: s_endpgm 2643; 2644; GFX9-LABEL: sdiv_i16: 2645; GFX9: ; %bb.0: 2646; GFX9-NEXT: s_load_dword s0, s[8:9], 0x10 2647; GFX9-NEXT: v_mov_b32_e32 v2, 0 2648; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2649; GFX9-NEXT: s_bfe_i32 s1, s0, 0x100010 2650; GFX9-NEXT: s_ashr_i32 s4, s1, 31 2651; GFX9-NEXT: s_add_i32 s1, s1, s4 2652; GFX9-NEXT: s_xor_b32 s5, s1, s4 2653; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s5 2654; GFX9-NEXT: s_sub_i32 s1, 0, s5 2655; GFX9-NEXT: s_sext_i32_i16 s0, s0 2656; GFX9-NEXT: s_ashr_i32 s6, s0, 31 2657; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 2658; GFX9-NEXT: s_add_i32 s0, s0, s6 2659; GFX9-NEXT: s_xor_b32 s7, s0, s6 2660; GFX9-NEXT: s_xor_b32 s4, s6, s4 2661; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2662; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 2663; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 2664; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2665; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 2666; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 2667; GFX9-NEXT: v_mul_hi_u32 v0, s7, v0 2668; GFX9-NEXT: v_mul_lo_u32 v1, v0, s5 2669; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 2670; GFX9-NEXT: v_sub_u32_e32 v1, s7, v1 2671; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 2672; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2673; GFX9-NEXT: v_subrev_u32_e32 v3, s5, v1 2674; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2675; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 2676; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 2677; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2678; GFX9-NEXT: v_subrev_u32_e32 v3, s5, v1 2679; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2680; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 2681; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 2682; GFX9-NEXT: v_xor_b32_e32 v1, s6, v1 2683; GFX9-NEXT: v_subrev_u32_e32 v1, s6, v1 2684; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2685; GFX9-NEXT: global_store_short v2, v0, s[0:1] 2686; GFX9-NEXT: global_store_short v2, v1, s[2:3] 2687; GFX9-NEXT: s_endpgm 2688; 2689; GFX10-LABEL: sdiv_i16: 2690; GFX10: ; %bb.0: 2691; GFX10-NEXT: s_load_dword s0, s[8:9], 0x10 2692; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2693; GFX10-NEXT: s_bfe_i32 s1, s0, 0x100010 2694; GFX10-NEXT: s_sext_i32_i16 s0, s0 2695; GFX10-NEXT: s_ashr_i32 s4, s1, 31 2696; GFX10-NEXT: s_ashr_i32 s6, s0, 31 2697; GFX10-NEXT: s_add_i32 s1, s1, s4 2698; GFX10-NEXT: s_add_i32 s0, s0, s6 2699; GFX10-NEXT: s_xor_b32 s5, s1, s4 2700; GFX10-NEXT: s_xor_b32 s0, s0, s6 2701; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s5 2702; GFX10-NEXT: s_sub_i32 s1, 0, s5 2703; GFX10-NEXT: s_xor_b32 s4, s6, s4 2704; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 2705; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2706; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 2707; GFX10-NEXT: v_mul_lo_u32 v1, s1, v0 2708; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 2709; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 2710; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 2711; GFX10-NEXT: v_mul_lo_u32 v1, v0, s5 2712; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 2713; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 2714; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2715; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s5, v1 2716; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s5, v1 2717; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2718; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2719; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 2720; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s5, v1 2721; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s5, v1 2722; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2723; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2724; GFX10-NEXT: v_mov_b32_e32 v2, 0 2725; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 2726; GFX10-NEXT: v_xor_b32_e32 v1, s6, v1 2727; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 2728; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s6, v1 2729; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2730; GFX10-NEXT: global_store_short v2, v0, s[0:1] 2731; GFX10-NEXT: global_store_short v2, v1, s[2:3] 2732; GFX10-NEXT: s_endpgm 2733 %div = sdiv i16 %x, %y 2734 store i16 %div, ptr addrspace(1) %out0 2735 %rem = srem i16 %x, %y 2736 store i16 %rem, ptr addrspace(1) %out1 2737 ret void 2738} 2739 2740define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i16> %x, <2 x i16> %y) { 2741; GFX8-LABEL: sdivrem_v2i16: 2742; GFX8: ; %bb.0: 2743; GFX8-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x10 2744; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2745; GFX8-NEXT: s_sext_i32_i16 s0, s3 2746; GFX8-NEXT: s_ashr_i32 s10, s0, 31 2747; GFX8-NEXT: s_add_i32 s0, s0, s10 2748; GFX8-NEXT: s_xor_b32 s11, s0, s10 2749; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s11 2750; GFX8-NEXT: s_sub_i32 s4, 0, s11 2751; GFX8-NEXT: s_bfe_i32 s1, s3, 0x100010 2752; GFX8-NEXT: s_ashr_i32 s12, s1, 31 2753; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 2754; GFX8-NEXT: s_add_i32 s1, s1, s12 2755; GFX8-NEXT: s_xor_b32 s13, s1, s12 2756; GFX8-NEXT: s_sext_i32_i16 s0, s2 2757; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2758; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 2759; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s13 2760; GFX8-NEXT: s_ashr_i32 s3, s0, 31 2761; GFX8-NEXT: s_add_i32 s0, s0, s3 2762; GFX8-NEXT: v_mul_lo_u32 v1, s4, v0 2763; GFX8-NEXT: s_xor_b32 s0, s0, s3 2764; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 2765; GFX8-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0 2766; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 2767; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 2768; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 2769; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 2770; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 2771; GFX8-NEXT: v_mul_lo_u32 v2, v0, s11 2772; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 2773; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s0, v2 2774; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s11, v2 2775; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2776; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s11, v2 2777; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2778; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 2779; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s11, v2 2780; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2781; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s11, v2 2782; GFX8-NEXT: s_sub_i32 s1, 0, s13 2783; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2784; GFX8-NEXT: v_mul_lo_u32 v3, s1, v1 2785; GFX8-NEXT: s_bfe_i32 s1, s2, 0x100010 2786; GFX8-NEXT: s_ashr_i32 s2, s1, 31 2787; GFX8-NEXT: s_add_i32 s1, s1, s2 2788; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 2789; GFX8-NEXT: s_xor_b32 s1, s1, s2 2790; GFX8-NEXT: s_xor_b32 s0, s3, s10 2791; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 2792; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 2793; GFX8-NEXT: v_mul_hi_u32 v1, s1, v1 2794; GFX8-NEXT: v_xor_b32_e32 v2, s3, v2 2795; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 2796; GFX8-NEXT: v_mul_lo_u32 v3, v1, s13 2797; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s3, v2 2798; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 2799; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s1, v3 2800; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s13, v3 2801; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2802; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s13, v3 2803; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2804; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 2805; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s13, v3 2806; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2807; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s13, v3 2808; GFX8-NEXT: s_xor_b32 s0, s2, s12 2809; GFX8-NEXT: v_xor_b32_e32 v1, s0, v1 2810; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2811; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, s0, v1 2812; GFX8-NEXT: v_xor_b32_e32 v3, s2, v3 2813; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 2814; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s2, v3 2815; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2816; GFX8-NEXT: v_or_b32_sdwa v4, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2817; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v3 2818; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2819; GFX8-NEXT: v_or_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2820; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2821; GFX8-NEXT: v_mov_b32_e32 v0, s4 2822; GFX8-NEXT: v_mov_b32_e32 v1, s5 2823; GFX8-NEXT: flat_store_dword v[0:1], v4 2824; GFX8-NEXT: v_mov_b32_e32 v0, s6 2825; GFX8-NEXT: v_mov_b32_e32 v1, s7 2826; GFX8-NEXT: flat_store_dword v[0:1], v2 2827; GFX8-NEXT: s_endpgm 2828; 2829; GFX9-LABEL: sdivrem_v2i16: 2830; GFX9: ; %bb.0: 2831; GFX9-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 2832; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2833; GFX9-NEXT: s_sext_i32_i16 s0, s5 2834; GFX9-NEXT: s_ashr_i32 s6, s0, 31 2835; GFX9-NEXT: s_add_i32 s0, s0, s6 2836; GFX9-NEXT: s_xor_b32 s7, s0, s6 2837; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 2838; GFX9-NEXT: s_bfe_i32 s5, s5, 0x100010 2839; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 2840; GFX9-NEXT: s_ashr_i32 s9, s5, 31 2841; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 2842; GFX9-NEXT: s_add_i32 s5, s5, s9 2843; GFX9-NEXT: s_xor_b32 s5, s5, s9 2844; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s5 2845; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2846; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 2847; GFX9-NEXT: s_sub_i32 s10, 0, s7 2848; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 2849; GFX9-NEXT: s_sext_i32_i16 s8, s4 2850; GFX9-NEXT: v_mul_lo_u32 v2, s10, v0 2851; GFX9-NEXT: s_ashr_i32 s10, s8, 31 2852; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 2853; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 2854; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 2855; GFX9-NEXT: s_add_i32 s8, s8, s10 2856; GFX9-NEXT: s_xor_b32 s8, s8, s10 2857; GFX9-NEXT: s_sub_i32 s11, 0, s5 2858; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 2859; GFX9-NEXT: v_mul_hi_u32 v0, s8, v0 2860; GFX9-NEXT: v_mul_lo_u32 v2, s11, v1 2861; GFX9-NEXT: s_bfe_i32 s4, s4, 0x100010 2862; GFX9-NEXT: s_ashr_i32 s11, s4, 31 2863; GFX9-NEXT: v_mul_lo_u32 v3, v0, s7 2864; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 2865; GFX9-NEXT: s_add_i32 s4, s4, s11 2866; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 2867; GFX9-NEXT: v_sub_u32_e32 v3, s8, v3 2868; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 2869; GFX9-NEXT: s_xor_b32 s4, s4, s11 2870; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 2871; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2872; GFX9-NEXT: v_subrev_u32_e32 v4, s7, v3 2873; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 2874; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2875; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 2876; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 2877; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2878; GFX9-NEXT: v_subrev_u32_e32 v4, s7, v3 2879; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc 2880; GFX9-NEXT: v_mul_lo_u32 v3, v1, s5 2881; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 2882; GFX9-NEXT: s_xor_b32 s6, s10, s6 2883; GFX9-NEXT: v_xor_b32_e32 v0, s6, v0 2884; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 2885; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 2886; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2887; GFX9-NEXT: v_subrev_u32_e32 v4, s5, v3 2888; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2889; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 2890; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 2891; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2892; GFX9-NEXT: v_subrev_u32_e32 v4, s5, v3 2893; GFX9-NEXT: s_xor_b32 s4, s11, s9 2894; GFX9-NEXT: v_subrev_u32_e32 v0, s6, v0 2895; GFX9-NEXT: v_xor_b32_e32 v2, s10, v2 2896; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2897; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 2898; GFX9-NEXT: v_subrev_u32_e32 v2, s10, v2 2899; GFX9-NEXT: v_subrev_u32_e32 v1, s4, v1 2900; GFX9-NEXT: v_xor_b32_e32 v3, s11, v3 2901; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 2902; GFX9-NEXT: v_subrev_u32_e32 v3, s11, v3 2903; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 2904; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v2 2905; GFX9-NEXT: v_mov_b32_e32 v2, 0 2906; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1 2907; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2908; GFX9-NEXT: global_store_dword v2, v0, s[0:1] 2909; GFX9-NEXT: global_store_dword v2, v1, s[2:3] 2910; GFX9-NEXT: s_endpgm 2911; 2912; GFX10-LABEL: sdivrem_v2i16: 2913; GFX10: ; %bb.0: 2914; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x10 2915; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2916; GFX10-NEXT: s_sext_i32_i16 s2, s1 2917; GFX10-NEXT: s_bfe_i32 s1, s1, 0x100010 2918; GFX10-NEXT: s_ashr_i32 s3, s2, 31 2919; GFX10-NEXT: s_ashr_i32 s10, s1, 31 2920; GFX10-NEXT: s_add_i32 s2, s2, s3 2921; GFX10-NEXT: s_add_i32 s1, s1, s10 2922; GFX10-NEXT: s_xor_b32 s2, s2, s3 2923; GFX10-NEXT: s_xor_b32 s1, s1, s10 2924; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s2 2925; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s1 2926; GFX10-NEXT: s_sub_i32 s4, 0, s2 2927; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 2928; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 2929; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2930; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 2931; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 2932; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 2933; GFX10-NEXT: v_mul_lo_u32 v2, s4, v0 2934; GFX10-NEXT: s_sub_i32 s4, 0, s1 2935; GFX10-NEXT: v_mul_lo_u32 v3, s4, v1 2936; GFX10-NEXT: s_sext_i32_i16 s4, s0 2937; GFX10-NEXT: s_bfe_i32 s0, s0, 0x100010 2938; GFX10-NEXT: s_ashr_i32 s11, s4, 31 2939; GFX10-NEXT: s_ashr_i32 s12, s0, 31 2940; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 2941; GFX10-NEXT: s_add_i32 s4, s4, s11 2942; GFX10-NEXT: s_add_i32 s0, s0, s12 2943; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 2944; GFX10-NEXT: s_xor_b32 s4, s4, s11 2945; GFX10-NEXT: s_xor_b32 s0, s0, s12 2946; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 2947; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 2948; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0 2949; GFX10-NEXT: v_mul_hi_u32 v1, s0, v1 2950; GFX10-NEXT: v_mul_lo_u32 v2, v0, s2 2951; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 2952; GFX10-NEXT: v_mul_lo_u32 v3, v1, s1 2953; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v1 2954; GFX10-NEXT: v_sub_nc_u32_e32 v2, s4, v2 2955; GFX10-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0 2956; GFX10-NEXT: v_sub_nc_u32_e32 v3, s0, v3 2957; GFX10-NEXT: v_subrev_nc_u32_e32 v5, s2, v2 2958; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v2 2959; GFX10-NEXT: v_cmp_le_u32_e64 s0, s1, v3 2960; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2961; GFX10-NEXT: v_subrev_nc_u32_e32 v4, s1, v3 2962; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo 2963; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v6, s0 2964; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v0 2965; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v4, s0 2966; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v2 2967; GFX10-NEXT: v_subrev_nc_u32_e32 v4, s2, v2 2968; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v1 2969; GFX10-NEXT: v_cmp_le_u32_e64 s0, s1, v3 2970; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2971; GFX10-NEXT: v_subrev_nc_u32_e32 v5, s1, v3 2972; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo 2973; GFX10-NEXT: s_xor_b32 s1, s11, s3 2974; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v6, s0 2975; GFX10-NEXT: v_xor_b32_e32 v0, s1, v0 2976; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v5, s0 2977; GFX10-NEXT: v_xor_b32_e32 v2, s11, v2 2978; GFX10-NEXT: s_xor_b32 s0, s12, s10 2979; GFX10-NEXT: v_xor_b32_e32 v1, s0, v1 2980; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s1, v0 2981; GFX10-NEXT: v_xor_b32_e32 v3, s12, v3 2982; GFX10-NEXT: v_subrev_nc_u32_e32 v2, s11, v2 2983; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s0, v1 2984; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 2985; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s12, v3 2986; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 2987; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 2988; GFX10-NEXT: v_mov_b32_e32 v1, 0 2989; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 2990; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2991; GFX10-NEXT: global_store_dword v1, v0, s[4:5] 2992; GFX10-NEXT: global_store_dword v1, v2, s[6:7] 2993; GFX10-NEXT: s_endpgm 2994 %div = sdiv <2 x i16> %x, %y 2995 store <2 x i16> %div, ptr addrspace(1) %out0 2996 %rem = srem <2 x i16> %x, %y 2997 store <2 x i16> %rem, ptr addrspace(1) %out1 2998 ret void 2999} 3000 3001define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i3 %x, i3 %y) { 3002; GFX8-LABEL: sdivrem_i3: 3003; GFX8: ; %bb.0: 3004; GFX8-NEXT: s_load_dword s4, s[8:9], 0x10 3005; GFX8-NEXT: s_waitcnt lgkmcnt(0) 3006; GFX8-NEXT: s_bfe_i32 s0, s4, 0x30008 3007; GFX8-NEXT: s_ashr_i32 s5, s0, 31 3008; GFX8-NEXT: s_add_i32 s0, s0, s5 3009; GFX8-NEXT: s_xor_b32 s6, s0, s5 3010; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s6 3011; GFX8-NEXT: s_sub_i32 s0, 0, s6 3012; GFX8-NEXT: s_bfe_i32 s4, s4, 0x30000 3013; GFX8-NEXT: s_ashr_i32 s7, s4, 31 3014; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 3015; GFX8-NEXT: s_add_i32 s4, s4, s7 3016; GFX8-NEXT: s_xor_b32 s4, s4, s7 3017; GFX8-NEXT: s_xor_b32 s5, s7, s5 3018; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3019; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 3020; GFX8-NEXT: v_mul_lo_u32 v1, s0, v0 3021; GFX8-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 3022; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 3023; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 3024; GFX8-NEXT: v_mul_hi_u32 v2, s4, v0 3025; GFX8-NEXT: s_waitcnt lgkmcnt(0) 3026; GFX8-NEXT: v_mov_b32_e32 v0, s0 3027; GFX8-NEXT: v_mov_b32_e32 v1, s1 3028; GFX8-NEXT: v_mul_lo_u32 v3, v2, s6 3029; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 3030; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s4, v3 3031; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 3032; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 3033; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s6, v3 3034; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 3035; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 3036; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 3037; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 3038; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s6, v3 3039; GFX8-NEXT: v_xor_b32_e32 v2, s5, v2 3040; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 3041; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s5, v2 3042; GFX8-NEXT: v_xor_b32_e32 v3, s7, v3 3043; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 3044; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s7, v3 3045; GFX8-NEXT: flat_store_byte v[0:1], v2 3046; GFX8-NEXT: v_mov_b32_e32 v0, s2 3047; GFX8-NEXT: v_and_b32_e32 v2, 7, v3 3048; GFX8-NEXT: v_mov_b32_e32 v1, s3 3049; GFX8-NEXT: flat_store_byte v[0:1], v2 3050; GFX8-NEXT: s_endpgm 3051; 3052; GFX9-LABEL: sdivrem_i3: 3053; GFX9: ; %bb.0: 3054; GFX9-NEXT: s_load_dword s0, s[8:9], 0x10 3055; GFX9-NEXT: v_mov_b32_e32 v2, 0 3056; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3057; GFX9-NEXT: s_bfe_i32 s1, s0, 0x30008 3058; GFX9-NEXT: s_ashr_i32 s4, s1, 31 3059; GFX9-NEXT: s_add_i32 s1, s1, s4 3060; GFX9-NEXT: s_xor_b32 s5, s1, s4 3061; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s5 3062; GFX9-NEXT: s_sub_i32 s1, 0, s5 3063; GFX9-NEXT: s_bfe_i32 s0, s0, 0x30000 3064; GFX9-NEXT: s_ashr_i32 s6, s0, 31 3065; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 3066; GFX9-NEXT: s_add_i32 s0, s0, s6 3067; GFX9-NEXT: s_xor_b32 s7, s0, s6 3068; GFX9-NEXT: s_xor_b32 s4, s6, s4 3069; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3070; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 3071; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 3072; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 3073; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 3074; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 3075; GFX9-NEXT: v_mul_hi_u32 v0, s7, v0 3076; GFX9-NEXT: v_mul_lo_u32 v1, v0, s5 3077; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 3078; GFX9-NEXT: v_sub_u32_e32 v1, s7, v1 3079; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 3080; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3081; GFX9-NEXT: v_subrev_u32_e32 v3, s5, v1 3082; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3083; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 3084; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 3085; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3086; GFX9-NEXT: v_subrev_u32_e32 v3, s5, v1 3087; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3088; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 3089; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 3090; GFX9-NEXT: v_xor_b32_e32 v1, s6, v1 3091; GFX9-NEXT: v_subrev_u32_e32 v1, s6, v1 3092; GFX9-NEXT: v_and_b32_e32 v0, 7, v0 3093; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3094; GFX9-NEXT: global_store_byte v2, v0, s[0:1] 3095; GFX9-NEXT: v_and_b32_e32 v0, 7, v1 3096; GFX9-NEXT: global_store_byte v2, v0, s[2:3] 3097; GFX9-NEXT: s_endpgm 3098; 3099; GFX10-LABEL: sdivrem_i3: 3100; GFX10: ; %bb.0: 3101; GFX10-NEXT: s_load_dword s0, s[8:9], 0x10 3102; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3103; GFX10-NEXT: s_bfe_i32 s1, s0, 0x30008 3104; GFX10-NEXT: s_bfe_i32 s0, s0, 0x30000 3105; GFX10-NEXT: s_ashr_i32 s4, s1, 31 3106; GFX10-NEXT: s_ashr_i32 s5, s0, 31 3107; GFX10-NEXT: s_add_i32 s1, s1, s4 3108; GFX10-NEXT: s_add_i32 s0, s0, s5 3109; GFX10-NEXT: s_xor_b32 s1, s1, s4 3110; GFX10-NEXT: s_xor_b32 s0, s0, s5 3111; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s1 3112; GFX10-NEXT: s_sub_i32 s2, 0, s1 3113; GFX10-NEXT: s_xor_b32 s4, s5, s4 3114; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 3115; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3116; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 3117; GFX10-NEXT: v_mul_lo_u32 v1, s2, v0 3118; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 3119; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 3120; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 3121; GFX10-NEXT: v_mul_lo_u32 v1, v0, s1 3122; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 3123; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 3124; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 3125; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 3126; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3127; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 3128; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 3129; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 3130; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 3131; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 3132; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3133; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 3134; GFX10-NEXT: v_mov_b32_e32 v2, 0 3135; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 3136; GFX10-NEXT: v_xor_b32_e32 v1, s5, v1 3137; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 3138; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s5, v1 3139; GFX10-NEXT: v_and_b32_e32 v0, 7, v0 3140; GFX10-NEXT: v_and_b32_e32 v1, 7, v1 3141; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3142; GFX10-NEXT: global_store_byte v2, v0, s[0:1] 3143; GFX10-NEXT: global_store_byte v2, v1, s[2:3] 3144; GFX10-NEXT: s_endpgm 3145 %div = sdiv i3 %x, %y 3146 store i3 %div, ptr addrspace(1) %out0 3147 %rem = srem i3 %x, %y 3148 store i3 %rem, ptr addrspace(1) %out1 3149 ret void 3150} 3151 3152define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i27 %x, i27 %y) { 3153; GFX8-LABEL: sdivrem_i27: 3154; GFX8: ; %bb.0: 3155; GFX8-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 3156; GFX8-NEXT: s_waitcnt lgkmcnt(0) 3157; GFX8-NEXT: s_bfe_i32 s0, s5, 0x1b0000 3158; GFX8-NEXT: s_ashr_i32 s5, s0, 31 3159; GFX8-NEXT: s_add_i32 s0, s0, s5 3160; GFX8-NEXT: s_xor_b32 s6, s0, s5 3161; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s6 3162; GFX8-NEXT: s_sub_i32 s0, 0, s6 3163; GFX8-NEXT: s_bfe_i32 s4, s4, 0x1b0000 3164; GFX8-NEXT: s_ashr_i32 s7, s4, 31 3165; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 3166; GFX8-NEXT: s_add_i32 s4, s4, s7 3167; GFX8-NEXT: s_xor_b32 s4, s4, s7 3168; GFX8-NEXT: s_xor_b32 s5, s7, s5 3169; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3170; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 3171; GFX8-NEXT: v_mul_lo_u32 v1, s0, v0 3172; GFX8-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 3173; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 3174; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 3175; GFX8-NEXT: v_mul_hi_u32 v2, s4, v0 3176; GFX8-NEXT: s_waitcnt lgkmcnt(0) 3177; GFX8-NEXT: v_mov_b32_e32 v0, s0 3178; GFX8-NEXT: v_mov_b32_e32 v1, s1 3179; GFX8-NEXT: v_mul_lo_u32 v3, v2, s6 3180; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 3181; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s4, v3 3182; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 3183; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 3184; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s6, v3 3185; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 3186; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 3187; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 3188; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 3189; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s6, v3 3190; GFX8-NEXT: v_xor_b32_e32 v2, s5, v2 3191; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 3192; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s5, v2 3193; GFX8-NEXT: v_xor_b32_e32 v3, s7, v3 3194; GFX8-NEXT: v_and_b32_e32 v2, 0x7ffffff, v2 3195; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s7, v3 3196; GFX8-NEXT: flat_store_dword v[0:1], v2 3197; GFX8-NEXT: v_mov_b32_e32 v0, s2 3198; GFX8-NEXT: v_and_b32_e32 v2, 0x7ffffff, v3 3199; GFX8-NEXT: v_mov_b32_e32 v1, s3 3200; GFX8-NEXT: flat_store_dword v[0:1], v2 3201; GFX8-NEXT: s_endpgm 3202; 3203; GFX9-LABEL: sdivrem_i27: 3204; GFX9: ; %bb.0: 3205; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x10 3206; GFX9-NEXT: v_mov_b32_e32 v2, 0 3207; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3208; GFX9-NEXT: s_bfe_i32 s1, s1, 0x1b0000 3209; GFX9-NEXT: s_ashr_i32 s4, s1, 31 3210; GFX9-NEXT: s_add_i32 s1, s1, s4 3211; GFX9-NEXT: s_xor_b32 s5, s1, s4 3212; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s5 3213; GFX9-NEXT: s_sub_i32 s1, 0, s5 3214; GFX9-NEXT: s_bfe_i32 s0, s0, 0x1b0000 3215; GFX9-NEXT: s_ashr_i32 s6, s0, 31 3216; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 3217; GFX9-NEXT: s_add_i32 s0, s0, s6 3218; GFX9-NEXT: s_xor_b32 s7, s0, s6 3219; GFX9-NEXT: s_xor_b32 s4, s6, s4 3220; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3221; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 3222; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 3223; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 3224; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 3225; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 3226; GFX9-NEXT: v_mul_hi_u32 v0, s7, v0 3227; GFX9-NEXT: v_mul_lo_u32 v1, v0, s5 3228; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 3229; GFX9-NEXT: v_sub_u32_e32 v1, s7, v1 3230; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 3231; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3232; GFX9-NEXT: v_subrev_u32_e32 v3, s5, v1 3233; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3234; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 3235; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v1 3236; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3237; GFX9-NEXT: v_subrev_u32_e32 v3, s5, v1 3238; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3239; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 3240; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 3241; GFX9-NEXT: v_xor_b32_e32 v1, s6, v1 3242; GFX9-NEXT: v_subrev_u32_e32 v1, s6, v1 3243; GFX9-NEXT: v_and_b32_e32 v0, 0x7ffffff, v0 3244; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3245; GFX9-NEXT: global_store_dword v2, v0, s[0:1] 3246; GFX9-NEXT: v_and_b32_e32 v0, 0x7ffffff, v1 3247; GFX9-NEXT: global_store_dword v2, v0, s[2:3] 3248; GFX9-NEXT: s_endpgm 3249; 3250; GFX10-LABEL: sdivrem_i27: 3251; GFX10: ; %bb.0: 3252; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x10 3253; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3254; GFX10-NEXT: s_bfe_i32 s1, s1, 0x1b0000 3255; GFX10-NEXT: s_bfe_i32 s0, s0, 0x1b0000 3256; GFX10-NEXT: s_ashr_i32 s4, s1, 31 3257; GFX10-NEXT: s_ashr_i32 s5, s0, 31 3258; GFX10-NEXT: s_add_i32 s1, s1, s4 3259; GFX10-NEXT: s_add_i32 s0, s0, s5 3260; GFX10-NEXT: s_xor_b32 s1, s1, s4 3261; GFX10-NEXT: s_xor_b32 s0, s0, s5 3262; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s1 3263; GFX10-NEXT: s_sub_i32 s2, 0, s1 3264; GFX10-NEXT: s_xor_b32 s4, s5, s4 3265; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 3266; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3267; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 3268; GFX10-NEXT: v_mul_lo_u32 v1, s2, v0 3269; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 3270; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 3271; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 3272; GFX10-NEXT: v_mul_lo_u32 v1, v0, s1 3273; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 3274; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 3275; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 3276; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 3277; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3278; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 3279; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 3280; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 3281; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 3282; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 3283; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3284; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 3285; GFX10-NEXT: v_mov_b32_e32 v2, 0 3286; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 3287; GFX10-NEXT: v_xor_b32_e32 v1, s5, v1 3288; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 3289; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s5, v1 3290; GFX10-NEXT: v_and_b32_e32 v0, 0x7ffffff, v0 3291; GFX10-NEXT: v_and_b32_e32 v1, 0x7ffffff, v1 3292; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3293; GFX10-NEXT: global_store_dword v2, v0, s[0:1] 3294; GFX10-NEXT: global_store_dword v2, v1, s[2:3] 3295; GFX10-NEXT: s_endpgm 3296 %div = sdiv i27 %x, %y 3297 store i27 %div, ptr addrspace(1) %out0 3298 %rem = srem i27 %x, %y 3299 store i27 %rem, ptr addrspace(1) %out1 3300 ret void 3301} 3302