; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) { ; GFX8V4-LABEL: addrspacecast: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX8V4-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x40 ; GFX8V4-NEXT: v_mov_b32_e32 v4, 1 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V4-NEXT: s_cmp_lg_u32 s0, -1 ; GFX8V4-NEXT: s_cselect_b32 s3, s3, 0 ; GFX8V4-NEXT: s_cselect_b32 s0, s0, 0 ; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s3 ; GFX8V4-NEXT: s_cselect_b32 s0, s2, 0 ; GFX8V4-NEXT: s_cselect_b32 s1, s1, 0 ; GFX8V4-NEXT: v_mov_b32_e32 v2, s1 ; GFX8V4-NEXT: v_mov_b32_e32 v3, s0 ; GFX8V4-NEXT: flat_store_dword v[0:1], v4 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: v_mov_b32_e32 v0, 2 ; GFX8V4-NEXT: flat_store_dword v[2:3], v0 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: s_endpgm ; ; GFX8V5-LABEL: addrspacecast: ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX8V5-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0xc8 ; GFX8V5-NEXT: v_mov_b32_e32 v4, 1 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_cmp_lg_u32 s0, -1 ; GFX8V5-NEXT: s_cselect_b32 s2, s2, 0 ; GFX8V5-NEXT: s_cselect_b32 s0, s0, 0 ; GFX8V5-NEXT: s_cmp_lg_u32 s1, -1 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s2 ; GFX8V5-NEXT: s_cselect_b32 s0, s3, 0 ; GFX8V5-NEXT: s_cselect_b32 s1, s1, 0 ; GFX8V5-NEXT: v_mov_b32_e32 v2, s1 ; GFX8V5-NEXT: v_mov_b32_e32 v3, s0 ; GFX8V5-NEXT: flat_store_dword v[0:1], v4 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: v_mov_b32_e32 v0, 2 ; GFX8V5-NEXT: flat_store_dword v[2:3], v0 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; ; GFX9V4-LABEL: addrspacecast: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base ; GFX9V4-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX9V4-NEXT: v_mov_b32_e32 v4, 1 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1 ; GFX9V4-NEXT: s_cselect_b32 s2, s3, 0 ; GFX9V4-NEXT: s_cselect_b32 s0, s0, 0 ; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1 ; GFX9V4-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V4-NEXT: v_mov_b32_e32 v1, s2 ; GFX9V4-NEXT: s_cselect_b32 s0, s5, 0 ; GFX9V4-NEXT: s_cselect_b32 s1, s1, 0 ; GFX9V4-NEXT: v_mov_b32_e32 v2, s1 ; GFX9V4-NEXT: v_mov_b32_e32 v3, s0 ; GFX9V4-NEXT: flat_store_dword v[0:1], v4 ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: v_mov_b32_e32 v0, 2 ; GFX9V4-NEXT: flat_store_dword v[2:3], v0 ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: s_endpgm ; ; GFX9V5-LABEL: addrspacecast: ; GFX9V5: ; %bb.0: ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base ; GFX9V5-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX9V5-NEXT: v_mov_b32_e32 v4, 1 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1 ; GFX9V5-NEXT: s_cselect_b32 s2, s3, 0 ; GFX9V5-NEXT: s_cselect_b32 s0, s0, 0 ; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1 ; GFX9V5-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V5-NEXT: v_mov_b32_e32 v1, s2 ; GFX9V5-NEXT: s_cselect_b32 s0, s5, 0 ; GFX9V5-NEXT: s_cselect_b32 s1, s1, 0 ; GFX9V5-NEXT: v_mov_b32_e32 v2, s1 ; GFX9V5-NEXT: v_mov_b32_e32 v3, s0 ; GFX9V5-NEXT: flat_store_dword v[0:1], v4 ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: v_mov_b32_e32 v0, 2 ; GFX9V5-NEXT: flat_store_dword v[2:3], v0 ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: s_endpgm %flat.private = addrspacecast ptr addrspace(5) %ptr.private to ptr %flat.local = addrspacecast ptr addrspace(3) %ptr.local to ptr store volatile i32 1, ptr %flat.private store volatile i32 2, ptr %flat.local ret void } define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) { ; GFX8V4-LABEL: llvm_amdgcn_is_shared: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dword s0, s[6:7], 0x40 ; GFX8V4-NEXT: s_load_dword s1, s[8:9], 0x4 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0 ; GFX8V4-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GFX8V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; GFX8V4-NEXT: flat_store_dword v[0:1], v0 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: s_endpgm ; ; GFX8V5-LABEL: llvm_amdgcn_is_shared: ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_load_dword s0, s[8:9], 0xcc ; GFX8V5-NEXT: s_load_dword s1, s[8:9], 0x4 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0 ; GFX8V5-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GFX8V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; GFX8V5-NEXT: flat_store_dword v[0:1], v0 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; ; GFX9V4-LABEL: llvm_amdgcn_is_shared: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dword s2, s[8:9], 0x4 ; GFX9V4-NEXT: s_mov_b64 s[0:1], src_shared_base ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V4-NEXT: s_cmp_eq_u32 s2, s1 ; GFX9V4-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GFX9V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; GFX9V4-NEXT: global_store_dword v[0:1], v0, off ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: s_endpgm ; ; GFX9V5-LABEL: llvm_amdgcn_is_shared: ; GFX9V5: ; %bb.0: ; GFX9V5-NEXT: s_load_dword s2, s[8:9], 0x4 ; GFX9V5-NEXT: s_mov_b64 s[0:1], src_shared_base ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V5-NEXT: s_cmp_eq_u32 s2, s1 ; GFX9V5-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GFX9V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; GFX9V5-NEXT: global_store_dword v[0:1], v0, off ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: s_endpgm %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %ptr) %zext = zext i1 %is.shared to i32 store volatile i32 %zext, ptr addrspace(1) undef ret void } define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) { ; GFX8V4-LABEL: llvm_amdgcn_is_private: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dword s0, s[6:7], 0x44 ; GFX8V4-NEXT: s_load_dword s1, s[8:9], 0x4 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0 ; GFX8V4-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GFX8V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; GFX8V4-NEXT: flat_store_dword v[0:1], v0 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: s_endpgm ; ; GFX8V5-LABEL: llvm_amdgcn_is_private: ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_load_dword s0, s[8:9], 0xc8 ; GFX8V5-NEXT: s_load_dword s1, s[8:9], 0x4 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0 ; GFX8V5-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GFX8V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; GFX8V5-NEXT: flat_store_dword v[0:1], v0 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; ; GFX9V4-LABEL: llvm_amdgcn_is_private: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dword s2, s[8:9], 0x4 ; GFX9V4-NEXT: s_mov_b64 s[0:1], src_private_base ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V4-NEXT: s_cmp_eq_u32 s2, s1 ; GFX9V4-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GFX9V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; GFX9V4-NEXT: global_store_dword v[0:1], v0, off ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: s_endpgm ; ; GFX9V5-LABEL: llvm_amdgcn_is_private: ; GFX9V5: ; %bb.0: ; GFX9V5-NEXT: s_load_dword s2, s[8:9], 0x4 ; GFX9V5-NEXT: s_mov_b64 s[0:1], src_private_base ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V5-NEXT: s_cmp_eq_u32 s2, s1 ; GFX9V5-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GFX9V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; GFX9V5-NEXT: global_store_dword v[0:1], v0, off ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: s_endpgm %is.private = call i1 @llvm.amdgcn.is.private(ptr %ptr) %zext = zext i1 %is.private to i32 store volatile i32 %zext, ptr addrspace(1) undef ret void } define amdgpu_kernel void @llvm_trap() { ; GFX8V4-LABEL: llvm_trap: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_mov_b64 s[0:1], s[6:7] ; GFX8V4-NEXT: s_trap 2 ; ; GFX8V5-LABEL: llvm_trap: ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0xc8 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_trap 2 ; ; GFX9V4-LABEL: llvm_trap: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_trap 2 ; ; GFX9V5-LABEL: llvm_trap: ; GFX9V5: ; %bb.0: ; GFX9V5-NEXT: s_trap 2 call void @llvm.trap() unreachable } define amdgpu_kernel void @llvm_debugtrap() { ; GFX8V4-LABEL: llvm_debugtrap: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_trap 3 ; ; GFX8V5-LABEL: llvm_debugtrap: ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_trap 3 ; ; GFX9V4-LABEL: llvm_debugtrap: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_trap 3 ; ; GFX9V5-LABEL: llvm_debugtrap: ; GFX9V5: ; %bb.0: ; GFX9V5-NEXT: s_trap 3 call void @llvm.debugtrap() unreachable } define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) { ; GFX8V4-LABEL: llvm_amdgcn_queue_ptr: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: v_mov_b32_e32 v0, s6 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s7 ; GFX8V4-NEXT: s_add_u32 s0, s8, 8 ; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V4-NEXT: s_addc_u32 s1, s9, 0 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s1 ; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: v_mov_b32_e32 v0, s4 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s5 ; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX8V4-NEXT: v_mov_b32_e32 v2, s10 ; GFX8V4-NEXT: v_mov_b32_e32 v3, s11 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s1 ; GFX8V4-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: s_endpgm ; ; GFX8V5-LABEL: llvm_amdgcn_queue_ptr: ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: v_mov_b32_e32 v0, s6 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s7 ; GFX8V5-NEXT: s_add_u32 s0, s8, 8 ; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V5-NEXT: s_addc_u32 s1, s9, 0 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s1 ; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: v_mov_b32_e32 v0, s4 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s5 ; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX8V5-NEXT: v_mov_b32_e32 v2, s10 ; GFX8V5-NEXT: v_mov_b32_e32 v3, s11 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s1 ; GFX8V5-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; ; GFX9V4-LABEL: llvm_amdgcn_queue_ptr: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: v_mov_b32_e32 v2, 0 ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[6:7] glc ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[4:5] glc ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: v_mov_b32_e32 v0, s10 ; GFX9V4-NEXT: v_mov_b32_e32 v1, s11 ; GFX9V4-NEXT: ; kill: killed $sgpr6_sgpr7 ; GFX9V4-NEXT: ; kill: killed $sgpr4_sgpr5 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V4-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: s_endpgm ; ; GFX9V5-LABEL: llvm_amdgcn_queue_ptr: ; GFX9V5: ; %bb.0: ; GFX9V5-NEXT: v_mov_b32_e32 v2, 0 ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[6:7] glc ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[4:5] glc ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: v_mov_b32_e32 v0, s10 ; GFX9V5-NEXT: v_mov_b32_e32 v1, s11 ; GFX9V5-NEXT: ; kill: killed $sgpr6_sgpr7 ; GFX9V5-NEXT: ; kill: killed $sgpr4_sgpr5 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V5-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: s_endpgm %queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr() %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() %queue.load = load volatile i8, ptr addrspace(4) %queue.ptr %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr %dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr store volatile i64 %dispatch.id, ptr addrspace(1) %ptr ret void } declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() declare i64 @llvm.amdgcn.dispatch.id() declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() declare i1 @llvm.amdgcn.is.shared(ptr) declare i1 @llvm.amdgcn.is.private(ptr) declare void @llvm.trap() declare void @llvm.debugtrap() !llvm.module.flags = !{!0} !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}