1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 < %s | FileCheck %s --check-prefixes=VI 3; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck %s --check-prefixes=VI-SDWA 4; RUN: llc -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck %s --check-prefixes=CI 5 6define amdgpu_kernel void @bfe_combine8(ptr addrspace(1) nocapture %arg, i32 %x) { 7; VI-LABEL: bfe_combine8: 8; VI: ; %bb.0: 9; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 10; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 11; VI-NEXT: s_waitcnt lgkmcnt(0) 12; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 13; VI-NEXT: v_bfe_u32 v0, v0, 8, 8 14; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 15; VI-NEXT: v_mov_b32_e32 v1, s1 16; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0 17; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 18; VI-NEXT: flat_load_dword v2, v[0:1] 19; VI-NEXT: v_mov_b32_e32 v0, s0 20; VI-NEXT: v_mov_b32_e32 v1, s1 21; VI-NEXT: s_waitcnt vmcnt(0) 22; VI-NEXT: flat_store_dword v[0:1], v2 23; VI-NEXT: s_endpgm 24; 25; VI-SDWA-LABEL: bfe_combine8: 26; VI-SDWA: ; %bb.0: 27; VI-SDWA-NEXT: s_load_dword s2, s[4:5], 0x2c 28; VI-SDWA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 29; VI-SDWA-NEXT: v_mov_b32_e32 v1, 2 30; VI-SDWA-NEXT: s_waitcnt lgkmcnt(0) 31; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s2, v0 32; VI-SDWA-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 33; VI-SDWA-NEXT: v_mov_b32_e32 v1, s1 34; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s0, v0 35; VI-SDWA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 36; VI-SDWA-NEXT: flat_load_dword v2, v[0:1] 37; VI-SDWA-NEXT: v_mov_b32_e32 v0, s0 38; VI-SDWA-NEXT: v_mov_b32_e32 v1, s1 39; VI-SDWA-NEXT: s_waitcnt vmcnt(0) 40; VI-SDWA-NEXT: flat_store_dword v[0:1], v2 41; VI-SDWA-NEXT: s_endpgm 42; 43; CI-LABEL: bfe_combine8: 44; CI: ; %bb.0: 45; CI-NEXT: s_load_dword s2, s[4:5], 0xb 46; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 47; CI-NEXT: s_mov_b32 s3, 0xf000 48; CI-NEXT: s_mov_b32 s6, 0 49; CI-NEXT: s_mov_b32 s7, s3 50; CI-NEXT: s_waitcnt lgkmcnt(0) 51; CI-NEXT: v_add_i32_e32 v0, vcc, s2, v0 52; CI-NEXT: v_lshrrev_b32_e32 v0, 6, v0 53; CI-NEXT: s_mov_b64 s[4:5], s[0:1] 54; CI-NEXT: v_and_b32_e32 v0, 0x3fc, v0 55; CI-NEXT: v_mov_b32_e32 v1, 0 56; CI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 57; CI-NEXT: s_mov_b32 s2, -1 58; CI-NEXT: s_waitcnt vmcnt(0) 59; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0 60; CI-NEXT: s_endpgm 61 %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2 62 %idx = add i32 %x, %id 63 %srl = lshr i32 %idx, 8 64 %and = and i32 %srl, 255 65 %ptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %and 66 %val = load i32, ptr addrspace(1) %ptr, align 4 67 store i32 %val, ptr addrspace(1) %arg, align 4 68 ret void 69} 70 71define amdgpu_kernel void @bfe_combine16(ptr addrspace(1) nocapture %arg, i32 %x) { 72; VI-LABEL: bfe_combine16: 73; VI: ; %bb.0: 74; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 75; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 76; VI-NEXT: v_mov_b32_e32 v1, 0 77; VI-NEXT: s_waitcnt lgkmcnt(0) 78; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 79; VI-NEXT: v_bfe_u32 v0, v0, 16, 16 80; VI-NEXT: v_lshlrev_b32_e32 v0, 15, v0 81; VI-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] 82; VI-NEXT: v_mov_b32_e32 v2, s1 83; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0 84; VI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc 85; VI-NEXT: flat_load_dword v2, v[0:1] 86; VI-NEXT: v_mov_b32_e32 v0, s0 87; VI-NEXT: v_mov_b32_e32 v1, s1 88; VI-NEXT: s_waitcnt vmcnt(0) 89; VI-NEXT: flat_store_dword v[0:1], v2 90; VI-NEXT: s_endpgm 91; 92; VI-SDWA-LABEL: bfe_combine16: 93; VI-SDWA: ; %bb.0: 94; VI-SDWA-NEXT: s_load_dword s2, s[4:5], 0x2c 95; VI-SDWA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 96; VI-SDWA-NEXT: v_mov_b32_e32 v1, 15 97; VI-SDWA-NEXT: s_waitcnt lgkmcnt(0) 98; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s2, v0 99; VI-SDWA-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 100; VI-SDWA-NEXT: v_mov_b32_e32 v1, 0 101; VI-SDWA-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] 102; VI-SDWA-NEXT: v_mov_b32_e32 v2, s1 103; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s0, v0 104; VI-SDWA-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc 105; VI-SDWA-NEXT: flat_load_dword v2, v[0:1] 106; VI-SDWA-NEXT: v_mov_b32_e32 v0, s0 107; VI-SDWA-NEXT: v_mov_b32_e32 v1, s1 108; VI-SDWA-NEXT: s_waitcnt vmcnt(0) 109; VI-SDWA-NEXT: flat_store_dword v[0:1], v2 110; VI-SDWA-NEXT: s_endpgm 111; 112; CI-LABEL: bfe_combine16: 113; CI: ; %bb.0: 114; CI-NEXT: s_load_dword s2, s[4:5], 0xb 115; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 116; CI-NEXT: v_mov_b32_e32 v1, 0 117; CI-NEXT: s_mov_b32 s3, 0xf000 118; CI-NEXT: s_mov_b32 s6, 0 119; CI-NEXT: s_waitcnt lgkmcnt(0) 120; CI-NEXT: v_add_i32_e32 v0, vcc, s2, v0 121; CI-NEXT: v_lshrrev_b32_e32 v0, 1, v0 122; CI-NEXT: v_and_b32_e32 v0, 0x7fff8000, v0 123; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 124; CI-NEXT: s_mov_b32 s7, s3 125; CI-NEXT: s_mov_b64 s[4:5], s[0:1] 126; CI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 127; CI-NEXT: s_mov_b32 s2, -1 128; CI-NEXT: s_waitcnt vmcnt(0) 129; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0 130; CI-NEXT: s_endpgm 131 %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2 132 %idx = add i32 %x, %id 133 %srl = lshr i32 %idx, 1 134 %and = and i32 %srl, 2147450880 135 %ptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %and 136 %val = load i32, ptr addrspace(1) %ptr, align 4 137 store i32 %val, ptr addrspace(1) %arg, align 4 138 ret void 139} 140 141declare i32 @llvm.amdgcn.workitem.id.x() #1 142