1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - < %s | FileCheck --check-prefixes=GCN %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - < %s | FileCheck --check-prefixes=GCN %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - < %s | FileCheck --check-prefixes=GCN %s 5 6; Test vector bitfield extract. 7define i32 @v_srl_mask_i32(i32 %value) { 8; GCN-LABEL: v_srl_mask_i32: 9; GCN: ; %bb.0: 10; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GCN-NEXT: v_bfe_u32 v0, v0, 8, 5 12; GCN-NEXT: s_setpc_b64 s[30:31] 13 %1 = lshr i32 %value, 8 14 %2 = and i32 %1, 31 15 ret i32 %2 16} 17 18; Test scalar bitfield extract. 19define amdgpu_ps i32 @s_srl_mask_i32(i32 inreg %value) { 20; GCN-LABEL: s_srl_mask_i32: 21; GCN: ; %bb.0: 22; GCN-NEXT: s_bfe_u32 s0, s0, 0x50008 23; GCN-NEXT: ; return to shader part epilog 24 %1 = lshr i32 %value, 8 25 %2 = and i32 %1, 31 26 ret i32 %2 27} 28 29; Don't generate G_UBFX if the offset + width is too big. 30define amdgpu_ps i32 @s_srl_big_mask_i32(i32 inreg %value) { 31; GCN-LABEL: s_srl_big_mask_i32: 32; GCN: ; %bb.0: 33; GCN-NEXT: s_lshr_b32 s0, s0, 30 34; GCN-NEXT: ; return to shader part epilog 35 %1 = lshr i32 %value, 30 36 %2 = and i32 %1, 31 37 ret i32 %2 38} 39 40; Test vector bitfield extract. 41define i32 @v_mask_srl_i32(i32 %value) { 42; GCN-LABEL: v_mask_srl_i32: 43; GCN: ; %bb.0: 44; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GCN-NEXT: v_bfe_u32 v0, v0, 8, 5 46; GCN-NEXT: s_setpc_b64 s[30:31] 47 %1 = and i32 %value, 7936 ; 31 << 8 48 %2 = lshr i32 %1, 8 49 ret i32 %2 50} 51 52; Test scalar bitfield extract. 53define amdgpu_ps i32 @s_mask_srl_i32(i32 inreg %value) { 54; GCN-LABEL: s_mask_srl_i32: 55; GCN: ; %bb.0: 56; GCN-NEXT: s_bfe_u32 s0, s0, 0x50008 57; GCN-NEXT: ; return to shader part epilog 58 %1 = and i32 %value, 7936 ; 31 << 8 59 %2 = lshr i32 %1, 8 60 ret i32 %2 61} 62 63; Test vector bitfield extract for 64-bits. 64define i64 @v_srl_mask_i64(i64 %value) { 65; GCN-LABEL: v_srl_mask_i64: 66; GCN: ; %bb.0: 67; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GCN-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] 69; GCN-NEXT: v_mov_b32_e32 v1, 0 70; GCN-NEXT: v_bfe_u32 v0, v0, 0, 10 71; GCN-NEXT: s_setpc_b64 s[30:31] 72 %1 = lshr i64 %value, 25 73 %2 = and i64 %1, 1023 74 ret i64 %2 75} 76 77; Test scalar bitfield extract for 64-bits. 78define amdgpu_ps i64 @s_srl_mask_i64(i64 inreg %value) { 79; GCN-LABEL: s_srl_mask_i64: 80; GCN: ; %bb.0: 81; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0xa0019 82; GCN-NEXT: ; return to shader part epilog 83 %1 = lshr i64 %value, 25 84 %2 = and i64 %1, 1023 85 ret i64 %2 86} 87 88; Don't generate G_UBFX if the offset + width is too big. 89define amdgpu_ps i64 @s_srl_big_mask_i64(i64 inreg %value) { 90; GCN-LABEL: s_srl_big_mask_i64: 91; GCN: ; %bb.0: 92; GCN-NEXT: s_lshr_b32 s0, s1, 28 93; GCN-NEXT: s_mov_b32 s1, 0 94; GCN-NEXT: ; return to shader part epilog 95 %1 = lshr i64 %value, 60 96 %2 = and i64 %1, 63 97 ret i64 %2 98} 99 100; Test vector bitfield extract for 64-bits. 101; TODO: No need for a 64-bit shift instruction when the extracted value is 102; entirely contained within the upper or lower half. 103define i64 @v_mask_srl_i64(i64 %value) { 104; GCN-LABEL: v_mask_srl_i64: 105; GCN: ; %bb.0: 106; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; GCN-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] 108; GCN-NEXT: v_mov_b32_e32 v1, 0 109; GCN-NEXT: v_bfe_u32 v0, v0, 0, 10 110; GCN-NEXT: s_setpc_b64 s[30:31] 111 %1 = and i64 %value, 34326183936 ; 1023 << 25 112 %2 = lshr i64 %1, 25 113 ret i64 %2 114} 115 116; Test scalar bitfield extract for 64-bits. 117define amdgpu_ps i64 @s_mask_srl_i64(i64 inreg %value) { 118; GCN-LABEL: s_mask_srl_i64: 119; GCN: ; %bb.0: 120; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0xa0019 121; GCN-NEXT: ; return to shader part epilog 122 %1 = and i64 %value, 34326183936 ; 1023 << 25 123 %2 = lshr i64 %1, 25 124 ret i64 %2 125} 126