1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s 4 5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. 6 7define i32 @v_srem_i32(i32 %num, i32 %den) { 8; GISEL-LABEL: v_srem_i32: 9; GISEL: ; %bb.0: 10; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 12; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 13; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 14; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 15; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 16; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 17; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1 18; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 19; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3 20; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 21; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 22; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3 23; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 24; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 25; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3 26; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1 27; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 28; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 29; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 30; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 31; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 32; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 33; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 34; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 35; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 36; GISEL-NEXT: s_setpc_b64 s[30:31] 37; 38; CGP-LABEL: v_srem_i32: 39; CGP: ; %bb.0: 40; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 42; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v1 43; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 44; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3 45; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 46; CGP-NEXT: v_xor_b32_e32 v1, v1, v3 47; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1 48; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 49; CGP-NEXT: v_rcp_f32_e32 v3, v3 50; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 51; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 52; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 53; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 54; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 55; CGP-NEXT: v_mul_hi_u32 v3, v0, v3 56; CGP-NEXT: v_mul_lo_u32 v3, v3, v1 57; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 58; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 59; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 60; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 61; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 62; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 63; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 64; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 65; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 66; CGP-NEXT: s_setpc_b64 s[30:31] 67 %result = srem i32 %num, %den 68 ret i32 %result 69} 70 71; FIXME: This is a workaround for not handling uniform VGPR case. 72declare i32 @llvm.amdgcn.readfirstlane(i32) 73 74define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) { 75; GISEL-LABEL: s_srem_i32: 76; GISEL: ; %bb.0: 77; GISEL-NEXT: s_ashr_i32 s2, s0, 31 78; GISEL-NEXT: s_ashr_i32 s3, s1, 31 79; GISEL-NEXT: s_add_i32 s0, s0, s2 80; GISEL-NEXT: s_add_i32 s1, s1, s3 81; GISEL-NEXT: s_xor_b32 s0, s0, s2 82; GISEL-NEXT: s_xor_b32 s1, s1, s3 83; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1 84; GISEL-NEXT: s_sub_i32 s3, 0, s1 85; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 86; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 87; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 88; GISEL-NEXT: v_mul_lo_u32 v1, s3, v0 89; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1 90; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 91; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0 92; GISEL-NEXT: v_mul_lo_u32 v0, v0, s1 93; GISEL-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 94; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 95; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 96; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 97; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 98; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 99; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 100; GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 101; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0 102; GISEL-NEXT: v_readfirstlane_b32 s0, v0 103; GISEL-NEXT: ; return to shader part epilog 104; 105; CGP-LABEL: s_srem_i32: 106; CGP: ; %bb.0: 107; CGP-NEXT: s_ashr_i32 s2, s0, 31 108; CGP-NEXT: s_ashr_i32 s3, s1, 31 109; CGP-NEXT: s_add_i32 s0, s0, s2 110; CGP-NEXT: s_add_i32 s1, s1, s3 111; CGP-NEXT: s_xor_b32 s0, s0, s2 112; CGP-NEXT: s_xor_b32 s1, s1, s3 113; CGP-NEXT: v_cvt_f32_u32_e32 v0, s1 114; CGP-NEXT: s_sub_i32 s3, 0, s1 115; CGP-NEXT: v_rcp_f32_e32 v0, v0 116; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 117; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 118; CGP-NEXT: v_mul_lo_u32 v1, s3, v0 119; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 120; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 121; CGP-NEXT: v_mul_hi_u32 v0, s0, v0 122; CGP-NEXT: v_mul_lo_u32 v0, v0, s1 123; CGP-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 124; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 125; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 126; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 127; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 128; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 129; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 130; CGP-NEXT: v_xor_b32_e32 v0, s2, v0 131; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0 132; CGP-NEXT: v_readfirstlane_b32 s0, v0 133; CGP-NEXT: ; return to shader part epilog 134 %result = srem i32 %num, %den 135 %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result) 136 ret i32 %readlane 137} 138 139define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) { 140; GISEL-LABEL: v_srem_v2i32: 141; GISEL: ; %bb.0: 142; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 143; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 144; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2 145; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1 146; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 147; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 148; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 149; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6 150; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 151; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 152; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 153; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 154; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 155; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2 156; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 157; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 158; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 159; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 160; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 161; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 162; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 163; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 164; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 165; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5 166; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 167; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7 168; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 169; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 170; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 171; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 172; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 173; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2 174; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 175; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 176; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 177; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 178; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 179; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 180; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 181; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 182; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 183; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 184; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 185; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 186; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 187; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 188; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 189; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 190; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 191; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 192; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 193; GISEL-NEXT: s_setpc_b64 s[30:31] 194; 195; CGP-LABEL: v_srem_v2i32: 196; CGP: ; %bb.0: 197; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 198; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0 199; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v2 200; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1 201; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 202; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 203; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 204; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6 205; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 206; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 207; CGP-NEXT: v_xor_b32_e32 v2, v2, v5 208; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 209; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 210; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2 211; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 212; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 213; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 214; CGP-NEXT: v_rcp_f32_e32 v5, v5 215; CGP-NEXT: v_rcp_f32_e32 v8, v8 216; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 217; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 218; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 219; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 220; CGP-NEXT: v_mul_lo_u32 v7, v7, v5 221; CGP-NEXT: v_mul_lo_u32 v9, v9, v8 222; CGP-NEXT: v_mul_hi_u32 v7, v5, v7 223; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 224; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 225; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 226; CGP-NEXT: v_mul_hi_u32 v5, v0, v5 227; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 228; CGP-NEXT: v_mul_lo_u32 v5, v5, v2 229; CGP-NEXT: v_mul_lo_u32 v7, v7, v3 230; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 231; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 232; CGP-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 233; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 234; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 235; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 236; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 237; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 238; CGP-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 239; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 240; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 241; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 242; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 243; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 244; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 245; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 246; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 247; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 248; CGP-NEXT: s_setpc_b64 s[30:31] 249 %result = srem <2 x i32> %num, %den 250 ret <2 x i32> %result 251} 252 253define i32 @v_srem_i32_pow2k_denom(i32 %num) { 254; CHECK-LABEL: v_srem_i32_pow2k_denom: 255; CHECK: ; %bb.0: 256; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 257; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 258; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x45800000 259; CHECK-NEXT: v_mov_b32_e32 v3, 0xfffff000 260; CHECK-NEXT: v_mov_b32_e32 v4, 0x1000 261; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 262; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 263; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 264; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 265; CHECK-NEXT: v_mul_lo_u32 v3, v2, v3 266; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3 267; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 268; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 269; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2 270; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 271; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xfffff000, v0 272; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 273; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 274; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xfffff000, v0 275; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 276; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 277; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 278; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 279; CHECK-NEXT: s_setpc_b64 s[30:31] 280 %result = srem i32 %num, 4096 281 ret i32 %result 282} 283 284define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) { 285; GISEL-LABEL: v_srem_v2i32_pow2k_denom: 286; GISEL: ; %bb.0: 287; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 288; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 289; GISEL-NEXT: v_mov_b32_e32 v3, 0x1000 290; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 291; GISEL-NEXT: v_mov_b32_e32 v5, 0xfffff000 292; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1 293; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 294; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 295; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6 296; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 297; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 298; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 299; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 300; GISEL-NEXT: v_mul_lo_u32 v7, v4, v5 301; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 302; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 303; GISEL-NEXT: v_mul_hi_u32 v7, v0, v4 304; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4 305; GISEL-NEXT: v_lshlrev_b32_e32 v7, 12, v7 306; GISEL-NEXT: v_lshlrev_b32_e32 v4, 12, v4 307; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 308; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 309; GISEL-NEXT: v_add_i32_e32 v4, vcc, v0, v5 310; GISEL-NEXT: v_add_i32_e32 v7, vcc, 0xfffff000, v1 311; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 312; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 313; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 314; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 315; GISEL-NEXT: v_add_i32_e32 v4, vcc, v0, v5 316; GISEL-NEXT: v_add_i32_e32 v5, vcc, 0xfffff000, v1 317; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 318; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 319; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 320; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 321; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 322; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 323; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 324; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 325; GISEL-NEXT: s_setpc_b64 s[30:31] 326; 327; CGP-LABEL: v_srem_v2i32_pow2k_denom: 328; CGP: ; %bb.0: 329; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 330; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 331; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x45800000 332; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000 333; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 334; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1 335; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 336; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 337; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6 338; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 339; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 340; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 341; CGP-NEXT: v_mul_lo_u32 v7, v3, v4 342; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 343; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 344; CGP-NEXT: v_mul_hi_u32 v7, v0, v3 345; CGP-NEXT: v_mul_hi_u32 v3, v1, v3 346; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v7 347; CGP-NEXT: v_lshlrev_b32_e32 v3, 12, v3 348; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 349; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 350; CGP-NEXT: v_add_i32_e32 v3, vcc, v0, v4 351; CGP-NEXT: v_add_i32_e32 v7, vcc, 0xfffff000, v1 352; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 353; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 354; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 355; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 356; CGP-NEXT: v_add_i32_e32 v3, vcc, v0, v4 357; CGP-NEXT: v_add_i32_e32 v4, vcc, 0xfffff000, v1 358; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 359; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 360; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 361; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 362; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 363; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 364; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 365; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 366; CGP-NEXT: s_setpc_b64 s[30:31] 367 %result = srem <2 x i32> %num, <i32 4096, i32 4096> 368 ret <2 x i32> %result 369} 370 371define i32 @v_srem_i32_oddk_denom(i32 %num) { 372; CHECK-LABEL: v_srem_i32_oddk_denom: 373; CHECK: ; %bb.0: 374; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 375; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 376; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8 377; CHECK-NEXT: v_mov_b32_e32 v3, 0xffed2705 378; CHECK-NEXT: v_mov_b32_e32 v4, 0x12d8fb 379; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 380; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 381; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 382; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 383; CHECK-NEXT: v_mul_lo_u32 v3, v2, v3 384; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3 385; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 386; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 387; CHECK-NEXT: v_mul_lo_u32 v2, v2, v4 388; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 389; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xffed2705, v0 390; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 391; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 392; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xffed2705, v0 393; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 394; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 395; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 396; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 397; CHECK-NEXT: s_setpc_b64 s[30:31] 398 %result = srem i32 %num, 1235195 399 ret i32 %result 400} 401 402define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) { 403; GISEL-LABEL: v_srem_v2i32_oddk_denom: 404; GISEL: ; %bb.0: 405; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 406; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 407; GISEL-NEXT: v_mov_b32_e32 v3, 0x12d8fb 408; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb 409; GISEL-NEXT: v_mov_b32_e32 v5, 0xffed2705 410; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1 411; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 412; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 413; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6 414; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 415; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 416; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 417; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 418; GISEL-NEXT: v_mul_lo_u32 v7, v4, v5 419; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 420; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 421; GISEL-NEXT: v_mul_hi_u32 v7, v0, v4 422; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4 423; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 424; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3 425; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 426; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 427; GISEL-NEXT: v_add_i32_e32 v4, vcc, v0, v5 428; GISEL-NEXT: v_add_i32_e32 v7, vcc, 0xffed2705, v1 429; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 430; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 431; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 432; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 433; GISEL-NEXT: v_add_i32_e32 v4, vcc, v0, v5 434; GISEL-NEXT: v_add_i32_e32 v5, vcc, 0xffed2705, v1 435; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 436; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 437; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 438; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 439; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 440; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 441; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 442; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 443; GISEL-NEXT: s_setpc_b64 s[30:31] 444; 445; CGP-LABEL: v_srem_v2i32_oddk_denom: 446; CGP: ; %bb.0: 447; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 448; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 449; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x4996c7d8 450; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705 451; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb 452; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1 453; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 454; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 455; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6 456; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 457; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 458; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 459; CGP-NEXT: v_mul_lo_u32 v7, v3, v4 460; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 461; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 462; CGP-NEXT: v_mul_hi_u32 v7, v0, v3 463; CGP-NEXT: v_mul_hi_u32 v3, v1, v3 464; CGP-NEXT: v_mul_lo_u32 v7, v7, v5 465; CGP-NEXT: v_mul_lo_u32 v3, v3, v5 466; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 467; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 468; CGP-NEXT: v_add_i32_e32 v3, vcc, v0, v4 469; CGP-NEXT: v_add_i32_e32 v7, vcc, 0xffed2705, v1 470; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 471; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 472; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 473; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 474; CGP-NEXT: v_add_i32_e32 v3, vcc, v0, v4 475; CGP-NEXT: v_add_i32_e32 v4, vcc, 0xffed2705, v1 476; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 477; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 478; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 479; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 480; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 481; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 482; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 483; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 484; CGP-NEXT: s_setpc_b64 s[30:31] 485 %result = srem <2 x i32> %num, <i32 1235195, i32 1235195> 486 ret <2 x i32> %result 487} 488 489define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) { 490; CHECK-LABEL: v_srem_i32_pow2_shl_denom: 491; CHECK: ; %bb.0: 492; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 493; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1 494; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0 495; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 496; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 497; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 498; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 499; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 500; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v1 501; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 502; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 503; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 504; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 505; CHECK-NEXT: v_mul_lo_u32 v4, v4, v3 506; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4 507; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 508; CHECK-NEXT: v_mul_hi_u32 v3, v0, v3 509; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1 510; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 511; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 512; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 513; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 514; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 515; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 516; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 517; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 518; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 519; CHECK-NEXT: s_setpc_b64 s[30:31] 520 %shl.y = shl i32 4096, %y 521 %r = srem i32 %x, %shl.y 522 ret i32 %r 523} 524 525define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { 526; GISEL-LABEL: v_srem_v2i32_pow2_shl_denom: 527; GISEL: ; %bb.0: 528; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 529; GISEL-NEXT: v_lshl_b32_e32 v2, 0x1000, v2 530; GISEL-NEXT: v_lshl_b32_e32 v3, 0x1000, v3 531; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 532; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1 533; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v2 534; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 535; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 536; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 537; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 538; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 539; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 540; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 541; GISEL-NEXT: v_xor_b32_e32 v2, v2, v6 542; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 543; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2 544; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 545; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 546; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 547; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 548; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 549; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 550; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 551; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 552; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 553; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 554; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 555; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 556; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 557; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 558; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 559; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6 560; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 561; GISEL-NEXT: v_mul_lo_u32 v6, v6, v2 562; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 563; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 564; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 565; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 566; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 567; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 568; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 569; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 570; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 571; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 572; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 573; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 574; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 575; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 576; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 577; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 578; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 579; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 580; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 581; GISEL-NEXT: s_setpc_b64 s[30:31] 582; 583; CGP-LABEL: v_srem_v2i32_pow2_shl_denom: 584; CGP: ; %bb.0: 585; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 586; CGP-NEXT: v_lshl_b32_e32 v2, 0x1000, v2 587; CGP-NEXT: v_lshl_b32_e32 v3, 0x1000, v3 588; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0 589; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 590; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v2 591; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 592; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 593; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 594; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 595; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 596; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 597; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 598; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 599; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 600; CGP-NEXT: v_cvt_f32_u32_e32 v6, v2 601; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 602; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 603; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 604; CGP-NEXT: v_rcp_f32_e32 v6, v6 605; CGP-NEXT: v_rcp_f32_e32 v8, v8 606; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 607; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 608; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 609; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 610; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 611; CGP-NEXT: v_mul_lo_u32 v9, v9, v8 612; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 613; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 614; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 615; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 616; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 617; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 618; CGP-NEXT: v_mul_lo_u32 v6, v6, v2 619; CGP-NEXT: v_mul_lo_u32 v7, v7, v3 620; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 621; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 622; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 623; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 624; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 625; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 626; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 627; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 628; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 629; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 630; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 631; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 632; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 633; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 634; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 635; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 636; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 637; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 638; CGP-NEXT: s_setpc_b64 s[30:31] 639 %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y 640 %r = srem <2 x i32> %x, %shl.y 641 ret <2 x i32> %r 642} 643 644define i32 @v_srem_i32_24bit(i32 %num, i32 %den) { 645; GISEL-LABEL: v_srem_i32_24bit: 646; GISEL: ; %bb.0: 647; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 648; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 649; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1 650; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 651; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 652; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 653; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 654; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 655; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 656; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 657; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 658; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 659; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 660; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 661; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 662; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 663; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 664; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 665; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 666; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 667; GISEL-NEXT: s_setpc_b64 s[30:31] 668; 669; CGP-LABEL: v_srem_i32_24bit: 670; CGP: ; %bb.0: 671; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 672; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0 673; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v1 674; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 675; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 676; CGP-NEXT: v_rcp_f32_e32 v2, v2 677; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 678; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 679; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 680; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 681; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 682; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 683; CGP-NEXT: v_mul_lo_u32 v2, v2, v1 684; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 685; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 686; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 687; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 688; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 689; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 690; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 691; CGP-NEXT: s_setpc_b64 s[30:31] 692 %num.mask = and i32 %num, 16777215 693 %den.mask = and i32 %den, 16777215 694 %result = srem i32 %num.mask, %den.mask 695 ret i32 %result 696} 697 698define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { 699; GISEL-LABEL: v_srem_v2i32_24bit: 700; GISEL: ; %bb.0: 701; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 702; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 703; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1 704; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v2 705; GISEL-NEXT: v_and_b32_e32 v3, 0xffffff, v3 706; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 707; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 708; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 709; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 710; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 711; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 712; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 713; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 714; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 715; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 716; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 717; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 718; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 719; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 720; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 721; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 722; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 723; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 724; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2 725; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3 726; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 727; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 728; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 729; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 730; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 731; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 732; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 733; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 734; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 735; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 736; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 737; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 738; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 739; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 740; GISEL-NEXT: s_setpc_b64 s[30:31] 741; 742; CGP-LABEL: v_srem_v2i32_24bit: 743; CGP: ; %bb.0: 744; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 745; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0 746; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v1 747; CGP-NEXT: v_and_b32_e32 v2, 0xffffff, v2 748; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v3 749; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 750; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 751; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 752; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 753; CGP-NEXT: v_rcp_f32_e32 v4, v4 754; CGP-NEXT: v_rcp_f32_e32 v6, v6 755; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 756; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 757; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 758; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 759; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 760; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 761; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 762; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 763; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 764; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 765; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 766; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 767; CGP-NEXT: v_mul_lo_u32 v4, v4, v2 768; CGP-NEXT: v_mul_lo_u32 v5, v5, v3 769; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 770; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 771; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 772; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 773; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 774; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 775; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 776; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 777; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 778; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 779; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 780; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 781; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 782; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 783; CGP-NEXT: s_setpc_b64 s[30:31] 784 %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215> 785 %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215> 786 %result = srem <2 x i32> %num.mask, %den.mask 787 ret <2 x i32> %result 788} 789