1; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -o %t.v4.ll 2; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -o %t.v6.ll 3; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %t.v4.ll | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s 4; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %t.v4.ll | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s 5; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %t.v4.ll | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s 6; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %t.v4.ll | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s 7; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %t.v4.ll | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s 8; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %t.v4.ll | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s 9; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s 10; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s 11; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 < %t.v6.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s 12 13declare i32 @llvm.amdgcn.workitem.id.x() #0 14declare i32 @llvm.amdgcn.workitem.id.y() #0 15declare i32 @llvm.amdgcn.workitem.id.z() #0 16 17; MESA: .section .AMDGPU.config 18; MESA: .long 47180 19; MESA-NEXT: .long 132{{$}} 20 21; ALL-LABEL: {{^}}test_workitem_id_x: 22; MESA3D: enable_vgpr_workitem_id = 0 23 24; ALL-NOT: v0 25; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0 26 27; PACKED-TID: .amdhsa_system_vgpr_workitem_id 0 28define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) #1 { 29 %id = call i32 @llvm.amdgcn.workitem.id.x() 30 store i32 %id, ptr addrspace(1) %out 31 ret void 32} 33 34; MESA: .section .AMDGPU.config 35; MESA: .long 47180 36; MESA-NEXT: .long 2180{{$}} 37 38; ALL-LABEL: {{^}}test_workitem_id_y: 39; MESA3D: enable_vgpr_workitem_id = 1 40; MESA3D-NOT: v1 41; MESA3D: {{buffer|flat}}_store_dword {{.*}}v1 42 43; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10 44; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]] 45; PACKED-TID: .amdhsa_system_vgpr_workitem_id 1 46define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) #1 { 47 %id = call i32 @llvm.amdgcn.workitem.id.y() 48 store i32 %id, ptr addrspace(1) %out 49 ret void 50} 51 52; MESA: .section .AMDGPU.config 53; MESA: .long 47180 54; MESA-NEXT: .long 4228{{$}} 55 56; ALL-LABEL: {{^}}test_workitem_id_z: 57; MESA3D: enable_vgpr_workitem_id = 2 58; MESA3D-NOT: v2 59; MESA3D: {{buffer|flat}}_store_dword {{.*}}v2 60 61; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10 62; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]] 63; PACKED-TID: .amdhsa_system_vgpr_workitem_id 2 64define amdgpu_kernel void @test_workitem_id_z(ptr addrspace(1) %out) #1 { 65 %id = call i32 @llvm.amdgcn.workitem.id.z() 66 store i32 %id, ptr addrspace(1) %out 67 ret void 68} 69 70; ALL-LABEL: {{^}}test_workitem_id_x_usex2: 71; ALL-NOT: v0 72; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0 73; ALL-NOT: v0 74; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0 75define amdgpu_kernel void @test_workitem_id_x_usex2(ptr addrspace(1) %out) #1 { 76 %id0 = call i32 @llvm.amdgcn.workitem.id.x() 77 store volatile i32 %id0, ptr addrspace(1) %out 78 79 %id1 = call i32 @llvm.amdgcn.workitem.id.x() 80 store volatile i32 %id1, ptr addrspace(1) %out 81 ret void 82} 83 84; ALL-LABEL: {{^}}test_workitem_id_x_use_outside_entry: 85; ALL-NOT: v0 86; ALL: {{flat|global}}_store_{{dword|b32}} 87; ALL-NOT: v0 88; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0 89define amdgpu_kernel void @test_workitem_id_x_use_outside_entry(ptr addrspace(1) %out, i32 %arg) #1 { 90bb0: 91 store volatile i32 0, ptr addrspace(1) %out 92 %cond = icmp eq i32 %arg, 0 93 br i1 %cond, label %bb1, label %bb2 94 95bb1: 96 %id = call i32 @llvm.amdgcn.workitem.id.x() 97 store volatile i32 %id, ptr addrspace(1) %out 98 br label %bb2 99 100bb2: 101 ret void 102} 103 104; ALL-LABEL: {{^}}test_workitem_id_x_func: 105; ALL: s_waitcnt 106; HSA-NEXT: v_and_b32_e32 v2, 0x3ff, v31 107; MESA-NEXT: v_and_b32_e32 v2, 0x3ff, v31 108define void @test_workitem_id_x_func(ptr addrspace(1) %out) #1 { 109 %id = call i32 @llvm.amdgcn.workitem.id.x() 110 store i32 %id, ptr addrspace(1) %out 111 ret void 112} 113 114; ALL-LABEL: {{^}}test_workitem_id_y_func: 115; HSA: v_bfe_u32 v2, v31, 10, 10 116; MESA: v_bfe_u32 v2, v31, 10, 10 117define void @test_workitem_id_y_func(ptr addrspace(1) %out) #1 { 118 %id = call i32 @llvm.amdgcn.workitem.id.y() 119 store i32 %id, ptr addrspace(1) %out 120 ret void 121} 122 123; ALL-LABEL: {{^}}test_workitem_id_z_func: 124; HSA: v_bfe_u32 v2, v31, 20, 10 125; MESA: v_bfe_u32 v2, v31, 20, 10 126define void @test_workitem_id_z_func(ptr addrspace(1) %out) #1 { 127 %id = call i32 @llvm.amdgcn.workitem.id.z() 128 store i32 %id, ptr addrspace(1) %out 129 ret void 130} 131 132; FIXME: Should be able to avoid enabling in kernel inputs 133; FIXME: Packed tid should avoid the and 134; ALL-LABEL: {{^}}test_reqd_workgroup_size_x_only: 135; MESA3D: enable_vgpr_workitem_id = 0 136 137; ALL-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} 138; UNPACKED-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 139 140; PACKED: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x3ff, v0 141; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] 142 143; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] 144; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] 145define amdgpu_kernel void @test_reqd_workgroup_size_x_only(ptr %out) !reqd_work_group_size !0 { 146 %id.x = call i32 @llvm.amdgcn.workitem.id.x() 147 %id.y = call i32 @llvm.amdgcn.workitem.id.y() 148 %id.z = call i32 @llvm.amdgcn.workitem.id.z() 149 store volatile i32 %id.x, ptr %out 150 store volatile i32 %id.y, ptr %out 151 store volatile i32 %id.z, ptr %out 152 ret void 153} 154 155; ALL-LABEL: {{^}}test_reqd_workgroup_size_y_only: 156; MESA3D: enable_vgpr_workitem_id = 1 157 158; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} 159; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] 160 161; UNPACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 162 163; PACKED: v_bfe_u32 [[MASKED:v[0-9]+]], v0, 10, 10 164; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] 165 166; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] 167define amdgpu_kernel void @test_reqd_workgroup_size_y_only(ptr %out) !reqd_work_group_size !1 { 168 %id.x = call i32 @llvm.amdgcn.workitem.id.x() 169 %id.y = call i32 @llvm.amdgcn.workitem.id.y() 170 %id.z = call i32 @llvm.amdgcn.workitem.id.z() 171 store volatile i32 %id.x, ptr %out 172 store volatile i32 %id.y, ptr %out 173 store volatile i32 %id.z, ptr %out 174 ret void 175} 176 177; ALL-LABEL: {{^}}test_reqd_workgroup_size_z_only: 178; MESA3D: enable_vgpr_workitem_id = 2 179 180; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} 181; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] 182; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] 183 184; UNPACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v2 185 186; PACKED: v_bfe_u32 [[MASKED:v[0-9]+]], v0, 10, 20 187; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] 188define amdgpu_kernel void @test_reqd_workgroup_size_z_only(ptr %out) !reqd_work_group_size !2 { 189 %id.x = call i32 @llvm.amdgcn.workitem.id.x() 190 %id.y = call i32 @llvm.amdgcn.workitem.id.y() 191 %id.z = call i32 @llvm.amdgcn.workitem.id.z() 192 store volatile i32 %id.x, ptr %out 193 store volatile i32 %id.y, ptr %out 194 store volatile i32 %id.z, ptr %out 195 ret void 196} 197 198attributes #0 = { nounwind readnone } 199attributes #1 = { nounwind } 200 201!0 = !{i32 64, i32 1, i32 1} 202!1 = !{i32 1, i32 64, i32 1} 203!2 = !{i32 1, i32 1, i32 64} 204 205!llvm.module.flags = !{!99} 206!99 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION} 207