1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s 4 5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. 6 7define i32 @v_sdiv_i32(i32 %num, i32 %den) { 8; GISEL-LABEL: v_sdiv_i32: 9; GISEL: ; %bb.0: 10; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 12; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 13; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 14; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 15; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 16; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 17; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v1 18; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v1 19; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 20; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 21; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 22; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 23; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 24; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 25; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 26; GISEL-NEXT: v_mul_lo_u32 v5, v4, v1 27; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 28; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 29; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 30; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 31; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v0, v1 32; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 33; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v4 34; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 35; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc 36; GISEL-NEXT: v_xor_b32_e32 v1, v2, v3 37; GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 38; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 39; GISEL-NEXT: s_setpc_b64 s[30:31] 40; 41; CGP-LABEL: v_sdiv_i32: 42; CGP: ; %bb.0: 43; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 45; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v1 46; CGP-NEXT: v_xor_b32_e32 v4, v2, v3 47; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 48; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3 49; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 50; CGP-NEXT: v_xor_b32_e32 v1, v1, v3 51; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 52; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 53; CGP-NEXT: v_rcp_f32_e32 v2, v2 54; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 55; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 56; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 57; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 58; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 59; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 60; CGP-NEXT: v_mul_lo_u32 v3, v2, v1 61; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2 62; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 63; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 64; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 65; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 66; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 67; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v2 68; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 69; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 70; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 71; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 72; CGP-NEXT: s_setpc_b64 s[30:31] 73 %result = sdiv i32 %num, %den 74 ret i32 %result 75} 76 77; FIXME: This is a workaround for not handling uniform VGPR case. 78declare i32 @llvm.amdgcn.readfirstlane(i32) 79 80define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) { 81; GISEL-LABEL: s_sdiv_i32: 82; GISEL: ; %bb.0: 83; GISEL-NEXT: s_ashr_i32 s2, s0, 31 84; GISEL-NEXT: s_ashr_i32 s3, s1, 31 85; GISEL-NEXT: s_add_i32 s0, s0, s2 86; GISEL-NEXT: s_add_i32 s1, s1, s3 87; GISEL-NEXT: s_xor_b32 s0, s0, s2 88; GISEL-NEXT: s_xor_b32 s4, s1, s3 89; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s4 90; GISEL-NEXT: s_sub_i32 s1, 0, s4 91; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 92; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 93; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 94; GISEL-NEXT: v_mul_lo_u32 v1, s1, v0 95; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1 96; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 97; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0 98; GISEL-NEXT: v_mul_lo_u32 v1, v0, s4 99; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0 100; GISEL-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 101; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 102; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 103; GISEL-NEXT: v_subrev_i32_e64 v2, s[0:1], s4, v1 104; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 105; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0 106; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 107; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 108; GISEL-NEXT: s_xor_b32 s0, s2, s3 109; GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 110; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0 111; GISEL-NEXT: v_readfirstlane_b32 s0, v0 112; GISEL-NEXT: ; return to shader part epilog 113; 114; CGP-LABEL: s_sdiv_i32: 115; CGP: ; %bb.0: 116; CGP-NEXT: s_ashr_i32 s2, s0, 31 117; CGP-NEXT: s_ashr_i32 s3, s1, 31 118; CGP-NEXT: s_xor_b32 s4, s2, s3 119; CGP-NEXT: s_add_i32 s0, s0, s2 120; CGP-NEXT: s_add_i32 s1, s1, s3 121; CGP-NEXT: s_xor_b32 s0, s0, s2 122; CGP-NEXT: s_xor_b32 s2, s1, s3 123; CGP-NEXT: v_cvt_f32_u32_e32 v0, s2 124; CGP-NEXT: s_sub_i32 s1, 0, s2 125; CGP-NEXT: v_rcp_f32_e32 v0, v0 126; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 127; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 128; CGP-NEXT: v_mul_lo_u32 v1, s1, v0 129; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 130; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 131; CGP-NEXT: v_mul_hi_u32 v0, s0, v0 132; CGP-NEXT: v_mul_lo_u32 v1, v0, s2 133; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 134; CGP-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 135; CGP-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 136; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 137; CGP-NEXT: v_subrev_i32_e64 v2, s[0:1], s2, v1 138; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 139; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 140; CGP-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 141; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 142; CGP-NEXT: v_xor_b32_e32 v0, s4, v0 143; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0 144; CGP-NEXT: v_readfirstlane_b32 s0, v0 145; CGP-NEXT: ; return to shader part epilog 146 %result = sdiv i32 %num, %den 147 %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result) 148 ret i32 %readlane 149} 150 151define <2 x i32> @v_sdiv_v2i32(<2 x i32> %num, <2 x i32> %den) { 152; GISEL-LABEL: v_sdiv_v2i32: 153; GISEL: ; %bb.0: 154; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 155; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 156; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2 157; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1 158; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 159; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 160; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 161; GISEL-NEXT: v_xor_b32_e32 v8, v4, v5 162; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6 163; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 164; GISEL-NEXT: v_xor_b32_e32 v9, v6, v7 165; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 166; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 167; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 168; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 169; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 170; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 171; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 172; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 173; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 174; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 175; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 176; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 177; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 178; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 179; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 180; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 181; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 182; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 183; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 184; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 185; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 186; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 187; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 188; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 189; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3 190; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v5 191; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 192; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 193; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 194; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 195; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 196; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 197; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5] 198; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 199; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 200; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 201; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 202; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 203; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 204; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 205; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 206; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 207; GISEL-NEXT: v_xor_b32_e32 v0, v0, v8 208; GISEL-NEXT: v_xor_b32_e32 v1, v1, v9 209; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 210; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v9 211; GISEL-NEXT: s_setpc_b64 s[30:31] 212; 213; CGP-LABEL: v_sdiv_v2i32: 214; CGP: ; %bb.0: 215; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 216; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0 217; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v2 218; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1 219; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 220; CGP-NEXT: v_xor_b32_e32 v8, v4, v5 221; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 222; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 223; CGP-NEXT: v_xor_b32_e32 v9, v6, v7 224; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6 225; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 226; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 227; CGP-NEXT: v_xor_b32_e32 v2, v2, v5 228; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 229; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 230; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 231; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 232; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 233; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 234; CGP-NEXT: v_rcp_f32_e32 v4, v4 235; CGP-NEXT: v_rcp_f32_e32 v6, v6 236; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 237; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 238; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 239; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 240; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 241; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 242; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 243; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 244; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 245; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 246; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 247; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 248; CGP-NEXT: v_mul_lo_u32 v6, v4, v2 249; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4 250; CGP-NEXT: v_mul_lo_u32 v10, v5, v3 251; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v5 252; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 253; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 254; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 255; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 256; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 257; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 258; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5] 259; CGP-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 260; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 261; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4 262; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 263; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v5 264; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 265; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 266; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 267; CGP-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 268; CGP-NEXT: v_xor_b32_e32 v0, v0, v8 269; CGP-NEXT: v_xor_b32_e32 v1, v1, v9 270; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 271; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9 272; CGP-NEXT: s_setpc_b64 s[30:31] 273 %result = sdiv <2 x i32> %num, %den 274 ret <2 x i32> %result 275} 276 277define i32 @v_sdiv_i32_pow2k_denom(i32 %num) { 278; CHECK-LABEL: v_sdiv_i32_pow2k_denom: 279; CHECK: ; %bb.0: 280; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 282; CHECK-NEXT: v_lshrrev_b32_e32 v1, 20, v1 283; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 284; CHECK-NEXT: v_ashrrev_i32_e32 v0, 12, v0 285; CHECK-NEXT: s_setpc_b64 s[30:31] 286 %result = sdiv i32 %num, 4096 287 ret i32 %result 288} 289 290define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) { 291; CHECK-LABEL: v_sdiv_v2i32_pow2k_denom: 292; CHECK: ; %bb.0: 293; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 294; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0 295; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 296; CHECK-NEXT: v_lshrrev_b32_e32 v2, 20, v2 297; CHECK-NEXT: v_lshrrev_b32_e32 v3, 20, v3 298; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 299; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 300; CHECK-NEXT: v_ashrrev_i32_e32 v0, 12, v0 301; CHECK-NEXT: v_ashrrev_i32_e32 v1, 12, v1 302; CHECK-NEXT: s_setpc_b64 s[30:31] 303 %result = sdiv <2 x i32> %num, <i32 4096, i32 4096> 304 ret <2 x i32> %result 305} 306 307define i32 @v_sdiv_i32_oddk_denom(i32 %num) { 308; CHECK-LABEL: v_sdiv_i32_oddk_denom: 309; CHECK: ; %bb.0: 310; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 311; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 312; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8 313; CHECK-NEXT: v_mov_b32_e32 v3, 0xffed2705 314; CHECK-NEXT: v_mov_b32_e32 v4, 0x12d8fb 315; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 316; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 317; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 318; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 319; CHECK-NEXT: v_mul_lo_u32 v3, v2, v3 320; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3 321; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 322; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 323; CHECK-NEXT: v_mul_lo_u32 v3, v2, v4 324; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v2 325; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 326; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 327; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] 328; CHECK-NEXT: v_add_i32_e32 v3, vcc, 0xffed2705, v0 329; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] 330; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2 331; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 332; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 333; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 334; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 335; CHECK-NEXT: s_setpc_b64 s[30:31] 336 %result = sdiv i32 %num, 1235195 337 ret i32 %result 338} 339 340define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) { 341; GISEL-LABEL: v_sdiv_v2i32_oddk_denom: 342; GISEL: ; %bb.0: 343; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 344; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 345; GISEL-NEXT: v_mov_b32_e32 v3, 0x12d8fb 346; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb 347; GISEL-NEXT: v_mov_b32_e32 v5, 0xffed2705 348; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1 349; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 350; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 351; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6 352; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 353; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 354; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 355; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 356; GISEL-NEXT: v_mul_lo_u32 v7, v4, v5 357; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 358; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 359; GISEL-NEXT: v_mul_hi_u32 v7, v0, v4 360; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4 361; GISEL-NEXT: v_mul_lo_u32 v8, v7, v3 362; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v7 363; GISEL-NEXT: v_mul_lo_u32 v10, v4, v3 364; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v4 365; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 366; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 367; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 368; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5] 369; GISEL-NEXT: v_add_i32_e32 v5, vcc, v0, v5 370; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v3 371; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v11, s[6:7] 372; GISEL-NEXT: v_add_i32_e32 v8, vcc, 0xffed2705, v1 373; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5] 374; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v7 375; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7] 376; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v4 377; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 378; GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v5, vcc 379; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 380; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc 381; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 382; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 383; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 384; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 385; GISEL-NEXT: s_setpc_b64 s[30:31] 386; 387; CGP-LABEL: v_sdiv_v2i32_oddk_denom: 388; CGP: ; %bb.0: 389; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 390; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 391; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x4996c7d8 392; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705 393; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb 394; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1 395; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 396; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 397; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6 398; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 399; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 400; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 401; CGP-NEXT: v_mul_lo_u32 v7, v3, v4 402; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 403; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 404; CGP-NEXT: v_mul_hi_u32 v7, v0, v3 405; CGP-NEXT: v_mul_hi_u32 v3, v1, v3 406; CGP-NEXT: v_mul_lo_u32 v8, v7, v5 407; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7 408; CGP-NEXT: v_mul_lo_u32 v10, v3, v5 409; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v3 410; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 411; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 412; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 413; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5] 414; CGP-NEXT: v_add_i32_e32 v4, vcc, v0, v4 415; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v5 416; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[6:7] 417; CGP-NEXT: v_add_i32_e32 v8, vcc, 0xffed2705, v1 418; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 419; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v7 420; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7] 421; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3 422; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 423; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc 424; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 425; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc 426; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 427; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 428; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 429; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 430; CGP-NEXT: s_setpc_b64 s[30:31] 431 %result = sdiv <2 x i32> %num, <i32 1235195, i32 1235195> 432 ret <2 x i32> %result 433} 434 435define i32 @v_sdiv_i32_pow2_shl_denom(i32 %x, i32 %y) { 436; CHECK-LABEL: v_sdiv_i32_pow2_shl_denom: 437; CHECK: ; %bb.0: 438; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 439; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1 440; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0 441; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 442; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 443; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 444; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 445; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 446; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v1 447; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v1 448; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 449; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 450; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 451; CHECK-NEXT: v_mul_lo_u32 v5, v5, v4 452; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 453; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 454; CHECK-NEXT: v_mul_hi_u32 v4, v0, v4 455; CHECK-NEXT: v_mul_lo_u32 v5, v4, v1 456; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v4 457; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 458; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 459; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 460; CHECK-NEXT: v_sub_i32_e64 v5, s[4:5], v0, v1 461; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 462; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v4 463; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 464; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc 465; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3 466; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 467; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 468; CHECK-NEXT: s_setpc_b64 s[30:31] 469 %shl.y = shl i32 4096, %y 470 %r = sdiv i32 %x, %shl.y 471 ret i32 %r 472} 473 474define <2 x i32> @v_sdiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { 475; GISEL-LABEL: v_sdiv_v2i32_pow2_shl_denom: 476; GISEL: ; %bb.0: 477; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 478; GISEL-NEXT: v_lshl_b32_e32 v2, 0x1000, v2 479; GISEL-NEXT: v_lshl_b32_e32 v3, 0x1000, v3 480; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 481; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1 482; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v2 483; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 484; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 485; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 486; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 487; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 488; GISEL-NEXT: v_xor_b32_e32 v4, v4, v6 489; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 490; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 491; GISEL-NEXT: v_xor_b32_e32 v5, v5, v7 492; GISEL-NEXT: v_xor_b32_e32 v2, v2, v6 493; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 494; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2 495; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 496; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 497; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 498; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 499; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 500; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 501; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 502; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 503; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 504; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 505; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 506; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 507; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 508; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 509; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 510; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6 511; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 512; GISEL-NEXT: v_mul_lo_u32 v8, v6, v2 513; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v6 514; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3 515; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v7 516; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 517; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 518; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 519; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc 520; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v2 521; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 522; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v11, s[4:5] 523; GISEL-NEXT: v_sub_i32_e64 v9, s[6:7], v1, v3 524; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 525; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v6 526; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5] 527; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v7 528; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 529; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc 530; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 531; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v9, vcc 532; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 533; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 534; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 535; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 536; GISEL-NEXT: s_setpc_b64 s[30:31] 537; 538; CGP-LABEL: v_sdiv_v2i32_pow2_shl_denom: 539; CGP: ; %bb.0: 540; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 541; CGP-NEXT: v_lshl_b32_e32 v2, 0x1000, v2 542; CGP-NEXT: v_lshl_b32_e32 v3, 0x1000, v3 543; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0 544; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 545; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v2 546; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 547; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 548; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 549; CGP-NEXT: v_xor_b32_e32 v8, v4, v6 550; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 551; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 552; CGP-NEXT: v_xor_b32_e32 v4, v5, v7 553; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 554; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 555; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 556; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 557; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2 558; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v2 559; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 560; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 561; CGP-NEXT: v_rcp_f32_e32 v5, v5 562; CGP-NEXT: v_rcp_f32_e32 v7, v7 563; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 564; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 565; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 566; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 567; CGP-NEXT: v_mul_lo_u32 v6, v6, v5 568; CGP-NEXT: v_mul_lo_u32 v9, v9, v7 569; CGP-NEXT: v_mul_hi_u32 v6, v5, v6 570; CGP-NEXT: v_mul_hi_u32 v9, v7, v9 571; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 572; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v9 573; CGP-NEXT: v_mul_hi_u32 v5, v0, v5 574; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 575; CGP-NEXT: v_mul_lo_u32 v7, v5, v2 576; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v5 577; CGP-NEXT: v_mul_lo_u32 v10, v6, v3 578; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v6 579; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 580; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 581; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 582; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc 583; CGP-NEXT: v_sub_i32_e64 v7, s[4:5], v0, v2 584; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 585; CGP-NEXT: v_cndmask_b32_e64 v6, v6, v11, s[4:5] 586; CGP-NEXT: v_sub_i32_e64 v9, s[6:7], v1, v3 587; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 588; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v5 589; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5] 590; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v6 591; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 592; CGP-NEXT: v_cndmask_b32_e32 v0, v5, v7, vcc 593; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 594; CGP-NEXT: v_cndmask_b32_e32 v1, v6, v9, vcc 595; CGP-NEXT: v_xor_b32_e32 v0, v0, v8 596; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 597; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 598; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 599; CGP-NEXT: s_setpc_b64 s[30:31] 600 %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y 601 %r = sdiv <2 x i32> %x, %shl.y 602 ret <2 x i32> %r 603} 604 605define i32 @v_sdiv_i32_24bit(i32 %num, i32 %den) { 606; GISEL-LABEL: v_sdiv_i32_24bit: 607; GISEL: ; %bb.0: 608; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 609; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 610; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1 611; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 612; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 613; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 614; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 615; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 616; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 617; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 618; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 619; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 620; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 621; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 622; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 623; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 624; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 625; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 626; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 627; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2 628; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 629; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 630; GISEL-NEXT: s_setpc_b64 s[30:31] 631; 632; CGP-LABEL: v_sdiv_i32_24bit: 633; CGP: ; %bb.0: 634; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 635; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0 636; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v1 637; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 638; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 639; CGP-NEXT: v_rcp_f32_e32 v2, v2 640; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 641; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 642; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 643; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 644; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 645; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 646; CGP-NEXT: v_mul_lo_u32 v3, v2, v1 647; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 648; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 649; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 650; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 651; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 652; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 653; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v2 654; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 655; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 656; CGP-NEXT: s_setpc_b64 s[30:31] 657 %num.mask = and i32 %num, 16777215 658 %den.mask = and i32 %den, 16777215 659 %result = sdiv i32 %num.mask, %den.mask 660 ret i32 %result 661} 662 663define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { 664; GISEL-LABEL: v_sdiv_v2i32_24bit: 665; GISEL: ; %bb.0: 666; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 667; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 668; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1 669; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v2 670; GISEL-NEXT: v_and_b32_e32 v3, 0xffffff, v3 671; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 672; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 673; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 674; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 675; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 676; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 677; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 678; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 679; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 680; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 681; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 682; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 683; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 684; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 685; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 686; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 687; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 688; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 689; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 690; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 691; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 692; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v5 693; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 694; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 695; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 696; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 697; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 698; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 699; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5] 700; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 701; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 702; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 703; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 704; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 705; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 706; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 707; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 708; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 709; GISEL-NEXT: s_setpc_b64 s[30:31] 710; 711; CGP-LABEL: v_sdiv_v2i32_24bit: 712; CGP: ; %bb.0: 713; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 714; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0 715; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v1 716; CGP-NEXT: v_and_b32_e32 v2, 0xffffff, v2 717; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v3 718; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 719; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 720; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 721; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 722; CGP-NEXT: v_rcp_f32_e32 v4, v4 723; CGP-NEXT: v_rcp_f32_e32 v6, v6 724; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 725; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 726; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 727; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 728; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 729; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 730; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 731; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 732; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 733; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 734; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 735; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 736; CGP-NEXT: v_mul_lo_u32 v6, v4, v2 737; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4 738; CGP-NEXT: v_mul_lo_u32 v8, v5, v3 739; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v5 740; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 741; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 742; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 743; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 744; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 745; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 746; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5] 747; CGP-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 748; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 749; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4 750; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] 751; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v5 752; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 753; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 754; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 755; CGP-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 756; CGP-NEXT: s_setpc_b64 s[30:31] 757 %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215> 758 %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215> 759 %result = sdiv <2 x i32> %num.mask, %den.mask 760 ret <2 x i32> %result 761} 762 763define i32 @v_sdiv_i32_exact(i32 %num) { 764; CHECK-LABEL: v_sdiv_i32_exact: 765; CHECK: ; %bb.0: 766; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 767; CHECK-NEXT: v_ashrrev_i32_e32 v0, 12, v0 768; CHECK-NEXT: s_setpc_b64 s[30:31] 769 %result = sdiv exact i32 %num, 4096 770 ret i32 %result 771} 772 773define <2 x i32> @v_sdiv_v2i32_exact(<2 x i32> %num) { 774; CHECK-LABEL: v_sdiv_v2i32_exact: 775; CHECK: ; %bb.0: 776; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 777; CHECK-NEXT: v_ashrrev_i32_e32 v0, 12, v0 778; CHECK-NEXT: v_ashrrev_i32_e32 v1, 10, v1 779; CHECK-NEXT: s_setpc_b64 s[30:31] 780 %result = sdiv exact <2 x i32> %num, <i32 4096, i32 1024> 781 ret <2 x i32> %result 782} 783