1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti -amdgpu-codegenprepare-mul24=0 -amdgpu-codegenprepare-disable-idiv-expansion < %s | FileCheck -check-prefixes=SI,SI-SDAG %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti -amdgpu-codegenprepare-mul24=0 -amdgpu-codegenprepare-disable-idiv-expansion < %s | FileCheck -check-prefixes=SI,SI-GISEL %s 4 5declare i32 @llvm.smin.i32(i32, i32) 6declare i32 @llvm.smax.i32(i32, i32) 7declare i32 @llvm.umin.i32(i32, i32) 8declare i32 @llvm.umax.i32(i32, i32) 9 10; Test computeKnownBits for umed3 node. We know the base address has a 11; 0 sign bit only after umed3 is formed. The DS instruction offset can 12; only be folded on SI with a positive base address. 13define i32 @v_known_bits_umed3(i8 %a) { 14; SI-LABEL: v_known_bits_umed3: 15; SI: ; %bb.0: 16; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 18; SI-NEXT: v_mov_b32_e32 v1, 0x80 19; SI-NEXT: v_med3_u32 v0, v0, 32, v1 20; SI-NEXT: s_mov_b32 m0, -1 21; SI-NEXT: ds_read_u8 v0, v0 offset:128 22; SI-NEXT: s_waitcnt lgkmcnt(0) 23; SI-NEXT: s_setpc_b64 s[30:31] 24 %ext.a = zext i8 %a to i32 25 %max.a = call i32 @llvm.umax.i32(i32 %ext.a, i32 32) 26 %umed3 = call i32 @llvm.umin.i32(i32 %max.a, i32 128) 27 %cast.umed3 = inttoptr i32 %umed3 to ptr addrspace(3) 28 %gep = getelementptr i8, ptr addrspace(3) %cast.umed3, i32 128 29 %load = load i8, ptr addrspace(3) %gep 30 %result = zext i8 %load to i32 31 ret i32 %result 32} 33 34; The IR expansion of division is disabled. The division is legalized 35; late, after the formation of smed3. We need to be able to 36; computeNumSignBits on the smed3 in order to use the 24-bit-as-float 37; sdiv legalization. 38define i32 @v_known_signbits_smed3(i16 %a, i16 %b) { 39; SI-SDAG-LABEL: v_known_signbits_smed3: 40; SI-SDAG: ; %bb.0: 41; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42; SI-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16 43; SI-SDAG-NEXT: s_movk_i32 s4, 0xffc0 44; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x80 45; SI-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2 46; SI-SDAG-NEXT: v_cvt_f32_i32_e32 v2, v1 47; SI-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 48; SI-SDAG-NEXT: s_movk_i32 s4, 0xffe0 49; SI-SDAG-NEXT: v_med3_i32 v0, v0, s4, 64 50; SI-SDAG-NEXT: v_cvt_f32_i32_e32 v3, v0 51; SI-SDAG-NEXT: v_rcp_iflag_f32_e32 v4, v2 52; SI-SDAG-NEXT: v_xor_b32_e32 v0, v0, v1 53; SI-SDAG-NEXT: v_ashrrev_i32_e32 v0, 30, v0 54; SI-SDAG-NEXT: v_or_b32_e32 v0, 1, v0 55; SI-SDAG-NEXT: v_mul_f32_e32 v1, v3, v4 56; SI-SDAG-NEXT: v_trunc_f32_e32 v1, v1 57; SI-SDAG-NEXT: v_mad_f32 v3, -v1, v2, v3 58; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 59; SI-SDAG-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| 60; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 61; SI-SDAG-NEXT: v_add_i32_e32 v0, vcc, v1, v0 62; SI-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 63; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 64; 65; SI-GISEL-LABEL: v_known_signbits_smed3: 66; SI-GISEL: ; %bb.0: 67; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; SI-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 16 69; SI-GISEL-NEXT: v_not_b32_e32 v2, 63 70; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x80 71; SI-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3 72; SI-GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v1 73; SI-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2 74; SI-GISEL-NEXT: v_xor_b32_e32 v1, v1, v2 75; SI-GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1 76; SI-GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v1 77; SI-GISEL-NEXT: v_not_b32_e32 v4, 31 78; SI-GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3 79; SI-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 80; SI-GISEL-NEXT: v_med3_i32 v0, v0, v4, 64 81; SI-GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 82; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 83; SI-GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 84; SI-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 85; SI-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 86; SI-GISEL-NEXT: v_mul_lo_u32 v5, v5, v3 87; SI-GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 88; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v5 89; SI-GISEL-NEXT: v_mul_hi_u32 v3, v0, v3 90; SI-GISEL-NEXT: v_mul_lo_u32 v5, v3, v1 91; SI-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v3 92; SI-GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 93; SI-GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 94; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc 95; SI-GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v0, v1 96; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 97; SI-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v3 98; SI-GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 99; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc 100; SI-GISEL-NEXT: v_xor_b32_e32 v1, v4, v2 101; SI-GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 102; SI-GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 103; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 104 %ext.a = sext i16 %a to i32 105 %max.a = call i32 @llvm.smax.i32(i32 %ext.a, i32 -32) 106 %smed3.a = call i32 @llvm.smin.i32(i32 %max.a, i32 64) 107 %ext.b = sext i16 %b to i32 108 %max.b = call i32 @llvm.smax.i32(i32 %ext.b, i32 -64) 109 %smed3.b = call i32 @llvm.smin.i32(i32 %max.b, i32 128) 110 %mul = sdiv i32 %smed3.a, %smed3.b 111 ret i32 %mul 112} 113