1; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V4,HSA,ALL %s 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V4,OS-MESA3D,ALL %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-unknown -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,ALL %s 4 5; ALL-LABEL: {{^}}test: 6; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 1 7; CO-V4: s_load_dword s{{[0-9]+}}, s[8:9], 0xa 8 9; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[4:5], 0xa 10 11; HSA: .amdhsa_kernarg_size 8 12; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1 13define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 { 14 %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 15 %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10 16 %value = load i32, ptr addrspace(4) %gep 17 store i32 %value, ptr addrspace(1) %out 18 ret void 19} 20 21; ALL-LABEL: {{^}}test_implicit: 22; OS-MESA3D: kernarg_segment_byte_size = 24 23; OS-MESA3D: kernarg_segment_alignment = 4 24 25; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15 26 27; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x15 28 29; HSA: .amdhsa_kernarg_size 8 30define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 { 31 %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 32 %gep = getelementptr i32, ptr addrspace(4) %implicitarg.ptr, i64 10 33 %value = load i32, ptr addrspace(4) %gep 34 store i32 %value, ptr addrspace(1) %out 35 ret void 36} 37 38; ALL-LABEL: {{^}}test_implicit_alignment: 39; OS-MESA3D: kernarg_segment_byte_size = 28 40; OS-MESA3D: kernarg_segment_alignment = 4 41 42; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc 43; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4 44; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3 45; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] 46; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] 47 48; HSA: .amdhsa_kernarg_size 12 49define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #1 { 50 %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 51 %val = load i32, ptr addrspace(4) %implicitarg.ptr 52 store i32 %val, ptr addrspace(1) %out 53 ret void 54} 55 56; ALL-LABEL: {{^}}opencl_test_implicit_alignment 57; OS-MESA3D: kernarg_segment_byte_size = 28 58; OS-MESA3D: kernarg_segment_alignment = 4 59 60; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc 61; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4 62; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3 63; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] 64; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] 65 66; HSA: .amdhsa_kernarg_size 64 67define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #2 { 68 %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 69 %val = load i32, ptr addrspace(4) %implicitarg.ptr 70 store i32 %val, ptr addrspace(1) %out 71 ret void 72} 73 74; ALL-LABEL: {{^}}test_no_kernargs: 75; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 0 76; OS-MESA3D: kernarg_segment_byte_size = 0 77; OS-MESA3D: kernarg_segment_alignment = 4 78 79; HSA: s_mov_b64 [[OFFSET_NULL:s\[[0-9]+:[0-9]+\]]], 40{{$}} 80; HSA: s_load_dword s{{[0-9]+}}, [[OFFSET_NULL]] 81 82; HSA: .amdhsa_kernarg_size 0 83; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 0 84define amdgpu_kernel void @test_no_kernargs() #4 { 85 %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 86 %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10 87 %value = load i32, ptr addrspace(4) %gep 88 store volatile i32 %value, ptr addrspace(1) undef 89 ret void 90} 91 92; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs: 93; OS-MESA3D: kernarg_segment_byte_size = 16 94; OS-MESA3D: kernarg_segment_alignment = 4 95; HSA: .amdhsa_kernarg_size 48 96define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 { 97 %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 98 %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr 99 store volatile i32 %val, ptr addrspace(1) null 100 ret void 101} 102 103; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs_round_up: 104; OS-MESA3D: kernarg_segment_byte_size = 16 105; OS-MESA3D: kernarg_segment_alignment = 4 106; HSA: .amdhsa_kernarg_size 40 107define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 { 108 %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 109 %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr 110 store volatile i32 %val, ptr addrspace(1) null 111 ret void 112} 113 114; ALL-LABEL: {{^}}func_kernarg_segment_ptr: 115; ALL: v_mov_b32_e32 v0, 0{{$}} 116; ALL: v_mov_b32_e32 v1, 0{{$}} 117define ptr addrspace(4) @func_kernarg_segment_ptr() { 118 %ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 119 ret ptr addrspace(4) %ptr 120} 121 122declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0 123declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 124 125attributes #0 = { nounwind readnone } 126attributes #1 = { nounwind "amdgpu-implicitarg-num-bytes"="0" } 127attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" } 128attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" } 129attributes #4 = { nounwind "amdgpu-implicitarg-num-bytes"="0" "amdgpu-no-implicitarg-ptr" } 130 131!llvm.module.flags = !{!0} 132!0 = !{i32 1, !"amdhsa_code_object_version", i32 400} 133