1; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,MESA,ALL %s 3; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,MESA,ALL %s 4 5; ALL-LABEL: {{^}}test: 6; CO-V2: enable_sgpr_kernarg_segment_ptr = 1 7; HSA: kernarg_segment_byte_size = 8 8; HSA: kernarg_segment_alignment = 4 9 10; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa 11 12; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa 13define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 { 14 %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 15 %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10 16 %value = load i32, ptr addrspace(4) %gep 17 store i32 %value, ptr addrspace(1) %out 18 ret void 19} 20 21; ALL-LABEL: {{^}}test_implicit: 22; HSA: kernarg_segment_byte_size = 64 23; OS-MESA3D: kernarg_segment_byte_size = 24 24; CO-V2: kernarg_segment_alignment = 4 25 26; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15 27; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0x15 28define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 { 29 %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 30 %gep = getelementptr i32, ptr addrspace(4) %implicitarg.ptr, i64 10 31 %value = load i32, ptr addrspace(4) %gep 32 store i32 %value, ptr addrspace(1) %out 33 ret void 34} 35 36; ALL-LABEL: {{^}}test_implicit_alignment: 37; HSA: kernarg_segment_byte_size = 72 38; OS-MESA3D: kernarg_segment_byte_size = 28 39; CO-V2: kernarg_segment_alignment = 4 40 41 42; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc 43; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4 44; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3 45; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] 46; MESA: buffer_store_dword [[V_VAL]] 47; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] 48define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #1 { 49 %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 50 %val = load i32, ptr addrspace(4) %implicitarg.ptr 51 store i32 %val, ptr addrspace(1) %out 52 ret void 53} 54 55; ALL-LABEL: {{^}}opencl_test_implicit_alignment 56; HSA: kernarg_segment_byte_size = 64 57; OS-MESA3D: kernarg_segment_byte_size = 28 58; CO-V2: kernarg_segment_alignment = 4 59 60 61; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc 62; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4 63; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3 64; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] 65; MESA: buffer_store_dword [[V_VAL]] 66; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] 67define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #2 { 68 %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 69 %val = load i32, ptr addrspace(4) %implicitarg.ptr 70 store i32 %val, ptr addrspace(1) %out 71 ret void 72} 73 74; ALL-LABEL: {{^}}test_no_kernargs: 75; CO-V2: enable_sgpr_kernarg_segment_ptr = 0 76; CO-V2: kernarg_segment_byte_size = 0 77 78; CO-V2: kernarg_segment_alignment = 4 79 80; HSA: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0{{$}} 81; HSA: s_load_dword s{{[0-9]+}}, [[NULL]], 0xa{{$}} 82define amdgpu_kernel void @test_no_kernargs() #1 { 83 %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 84 %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10 85 %value = load i32, ptr addrspace(4) %gep 86 store volatile i32 %value, ptr addrspace(1) undef 87 ret void 88} 89 90; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs: 91; HSA: kernarg_segment_byte_size = 48 92; OS-MESA3d: kernarg_segment_byte_size = 16 93; CO-V2: kernarg_segment_alignment = 4 94define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 { 95 %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 96 %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr 97 store volatile i32 %val, ptr addrspace(1) null 98 ret void 99} 100 101; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs_round_up: 102; HSA: kernarg_segment_byte_size = 40 103; OS-MESA3D: kernarg_segment_byte_size = 16 104; CO-V2: kernarg_segment_alignment = 4 105define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 { 106 %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 107 %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr 108 store volatile i32 %val, ptr addrspace(1) null 109 ret void 110} 111 112declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0 113declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 114 115attributes #0 = { nounwind readnone } 116attributes #1 = { nounwind } 117attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" } 118attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" } 119 120!llvm.module.flags = !{!0} 121!0 = !{i32 1, !"amdgpu_code_object_version", i32 200} 122