1; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX908 %s 2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s 3 4; GCN-LABEL: {{^}}max_11_vgprs: 5; GFX900-NOT: SCRATCH_RSRC 6; GFX908-NOT: SCRATCH_RSRC 7; GFX908-DAG: v_accvgpr_write_b32 [[A_REG:a[0-9]+]], v{{[0-9]}} 8; GFX900-NOT: buffer_ 9; GFX908-NOT: buffer_ 10; GFX908-DAG: v_mov_b32_e32 v{{[0-9]}}, [[V_REG:v[0-9]+]] 11; GFX908-DAG: v_accvgpr_read_b32 [[V_REG]], [[A_REG]] 12 13; GFX900: NumVgprs: 11 14; GFX908: NumVgprs: 10 15; GFX900: ScratchSize: 0 16; GFX908: ScratchSize: 0 17; GCN: VGPRBlocks: 2 18; GFX900: NumVGPRsForWavesPerEU: 11 19; GFX908: NumVGPRsForWavesPerEU: 10 20define amdgpu_kernel void @max_11_vgprs(ptr addrspace(1) %p) #2 { 21 %tid = load volatile i32, ptr addrspace(1) undef 22 %p1 = getelementptr inbounds i32, ptr addrspace(1) %p, i32 %tid 23 %p2 = getelementptr inbounds i32, ptr addrspace(1) %p1, i32 4 24 %p3 = getelementptr inbounds i32, ptr addrspace(1) %p2, i32 8 25 %p4 = getelementptr inbounds i32, ptr addrspace(1) %p3, i32 12 26 %p5 = getelementptr inbounds i32, ptr addrspace(1) %p4, i32 16 27 %p6 = getelementptr inbounds i32, ptr addrspace(1) %p5, i32 20 28 %p7 = getelementptr inbounds i32, ptr addrspace(1) %p6, i32 24 29 %p8 = getelementptr inbounds i32, ptr addrspace(1) %p7, i32 28 30 %p9 = getelementptr inbounds i32, ptr addrspace(1) %p8, i32 32 31 %p10 = getelementptr inbounds i32, ptr addrspace(1) %p9, i32 36 32 %v1 = load volatile i32, ptr addrspace(1) %p1 33 %v2 = load volatile i32, ptr addrspace(1) %p2 34 %v3 = load volatile i32, ptr addrspace(1) %p3 35 %v4 = load volatile i32, ptr addrspace(1) %p4 36 %v5 = load volatile i32, ptr addrspace(1) %p5 37 %v6 = load volatile i32, ptr addrspace(1) %p6 38 %v7 = load volatile i32, ptr addrspace(1) %p7 39 %v8 = load volatile i32, ptr addrspace(1) %p8 40 %v9 = load volatile i32, ptr addrspace(1) %p9 41 %v10 = load volatile i32, ptr addrspace(1) %p10 42 call void asm sideeffect "", "v,v,v,v,v,v,v,v,v,v"(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, i32 %v6, i32 %v7, i32 %v8, i32 %v9, i32 %v10) 43 store volatile i32 %v1, ptr addrspace(1) undef 44 store volatile i32 %v2, ptr addrspace(1) undef 45 store volatile i32 %v3, ptr addrspace(1) undef 46 store volatile i32 %v4, ptr addrspace(1) undef 47 store volatile i32 %v5, ptr addrspace(1) undef 48 store volatile i32 %v6, ptr addrspace(1) undef 49 store volatile i32 %v7, ptr addrspace(1) undef 50 store volatile i32 %v8, ptr addrspace(1) undef 51 store volatile i32 %v9, ptr addrspace(1) undef 52 store volatile i32 %v10, ptr addrspace(1) undef 53 ret void 54} 55 56; GCN-LABEL: {{^}}max_10_vgprs_spill_v32: 57; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 58; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1 59; GCN: buffer_store_dword v{{[0-9]}}, 60; GFX908-DAG: v_accvgpr_write_b32 a0, v{{[0-9]}} 61; GFX908-DAG: v_accvgpr_write_b32 a9, v{{[0-9]}} 62; GCN-NOT: a10 63 64; GFX908: NumVgprs: 10 65; GFX900: ScratchSize: 100 66; GFX908: ScratchSize: 68 67; GFX908: VGPRBlocks: 2 68; GFX908: NumVGPRsForWavesPerEU: 10 69define amdgpu_kernel void @max_10_vgprs_spill_v32(ptr addrspace(1) %p) #0 { 70 %tid = call i32 @llvm.amdgcn.workitem.id.x() 71 %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %p, i32 %tid 72 %v = load volatile <32 x float>, ptr addrspace(1) %gep 73 store volatile <32 x float> %v, ptr addrspace(1) undef 74 ret void 75} 76 77; GCN-LABEL: {{^}}max_256_vgprs_spill_9x32: 78; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 79; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1 80; GFX908-NOT: SCRATCH_RSRC 81; GFX908-DAG: v_accvgpr_write_b32 a0, v 82; GFX900: buffer_store_dword v 83; GFX900: buffer_load_dword v 84; GFX908-NOT: buffer_ 85; GFX908-DAG: v_accvgpr_read_b32 86 87; GFX900: NumVgprs: 256 88; GFX900: ScratchSize: 132 89; GFX908: NumVgprs: 252 90; GFX908: ScratchSize: 0 91; GFX900: VGPRBlocks: 63 92; GFX908: VGPRBlocks: 62 93; GFX900: NumVGPRsForWavesPerEU: 256 94; GFX908: NumVGPRsForWavesPerEU: 252 95define amdgpu_kernel void @max_256_vgprs_spill_9x32(ptr addrspace(1) %p) #1 { 96 %tid = call i32 @llvm.amdgcn.workitem.id.x() 97 %p1 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p, i32 %tid 98 %p2 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p1, i32 %tid 99 %p3 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p2, i32 %tid 100 %p4 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p3, i32 %tid 101 %p5 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p4, i32 %tid 102 %p6 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p5, i32 %tid 103 %p7 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p6, i32 %tid 104 %p8 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p7, i32 %tid 105 %p9 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p8, i32 %tid 106 %v1 = load volatile <32 x float>, ptr addrspace(1) %p1 107 %v2 = load volatile <32 x float>, ptr addrspace(1) %p2 108 %v3 = load volatile <32 x float>, ptr addrspace(1) %p3 109 %v4 = load volatile <32 x float>, ptr addrspace(1) %p4 110 %v5 = load volatile <32 x float>, ptr addrspace(1) %p5 111 %v6 = load volatile <32 x float>, ptr addrspace(1) %p6 112 %v7 = load volatile <32 x float>, ptr addrspace(1) %p7 113 %v8 = load volatile <32 x float>, ptr addrspace(1) %p8 114 %v9 = load volatile <32 x float>, ptr addrspace(1) %p9 115 store volatile <32 x float> %v1, ptr addrspace(1) undef 116 store volatile <32 x float> %v2, ptr addrspace(1) undef 117 store volatile <32 x float> %v3, ptr addrspace(1) undef 118 store volatile <32 x float> %v4, ptr addrspace(1) undef 119 store volatile <32 x float> %v5, ptr addrspace(1) undef 120 store volatile <32 x float> %v6, ptr addrspace(1) undef 121 store volatile <32 x float> %v7, ptr addrspace(1) undef 122 store volatile <32 x float> %v8, ptr addrspace(1) undef 123 store volatile <32 x float> %v9, ptr addrspace(1) undef 124 ret void 125} 126 127; GCN-LABEL: {{^}}max_256_vgprs_spill_9x32_2bb: 128; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 129; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1 130; GFX908-NOT: SCRATCH_RSRC 131; GFX908: v_accvgpr_write_b32 132; GFX908: global_load_ 133; GFX900: buffer_store_dword v 134; GFX900: buffer_load_dword v 135; GFX908-NOT: buffer_ 136; GFX908-DAG: v_accvgpr_read_b32 137 138; GFX900: NumVgprs: 256 139; GFX908: NumVgprs: 252 140; GFX900: ScratchSize: 132 141; GFX908: ScratchSize: 0 142; GFX900: VGPRBlocks: 63 143; GFX908: VGPRBlocks: 62 144; GFX900: NumVGPRsForWavesPerEU: 256 145; GFX908: NumVGPRsForWavesPerEU: 252 146define amdgpu_kernel void @max_256_vgprs_spill_9x32_2bb(ptr addrspace(1) %p) #1 { 147 %tid = call i32 @llvm.amdgcn.workitem.id.x() 148 %p1 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p, i32 %tid 149 %p2 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p1, i32 %tid 150 %p3 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p2, i32 %tid 151 %p4 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p3, i32 %tid 152 %p5 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p4, i32 %tid 153 %p6 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p5, i32 %tid 154 %p7 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p6, i32 %tid 155 %p8 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p7, i32 %tid 156 %p9 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p8, i32 %tid 157 %v1 = load volatile <32 x float>, ptr addrspace(1) %p1 158 %v2 = load volatile <32 x float>, ptr addrspace(1) %p2 159 %v3 = load volatile <32 x float>, ptr addrspace(1) %p3 160 %v4 = load volatile <32 x float>, ptr addrspace(1) %p4 161 %v5 = load volatile <32 x float>, ptr addrspace(1) %p5 162 %v6 = load volatile <32 x float>, ptr addrspace(1) %p6 163 %v7 = load volatile <32 x float>, ptr addrspace(1) %p7 164 %v8 = load volatile <32 x float>, ptr addrspace(1) %p8 165 %v9 = load volatile <32 x float>, ptr addrspace(1) %p9 166 br label %st 167 168st: 169 store volatile <32 x float> %v1, ptr addrspace(1) undef 170 store volatile <32 x float> %v2, ptr addrspace(1) undef 171 store volatile <32 x float> %v3, ptr addrspace(1) undef 172 store volatile <32 x float> %v4, ptr addrspace(1) undef 173 store volatile <32 x float> %v5, ptr addrspace(1) undef 174 store volatile <32 x float> %v6, ptr addrspace(1) undef 175 store volatile <32 x float> %v7, ptr addrspace(1) undef 176 store volatile <32 x float> %v8, ptr addrspace(1) undef 177 store volatile <32 x float> %v9, ptr addrspace(1) undef 178 ret void 179} 180 181; Make sure there's no crash when we have loads from fixed stack 182; objects and are processing VGPR spills 183 184; GCN-LABEL: {{^}}stack_args_vgpr_spill: 185; GFX908: v_accvgpr_write_b32 186; GFX908: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 187; GFX908: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4 188define void @stack_args_vgpr_spill(<32 x float> %arg0, <32 x float> %arg1, ptr addrspace(1) %p) #1 { 189 %tid = call i32 @llvm.amdgcn.workitem.id.x() 190 %p1 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p, i32 %tid 191 %p2 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p1, i32 %tid 192 %p3 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p2, i32 %tid 193 %p4 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p3, i32 %tid 194 %p5 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p4, i32 %tid 195 %p6 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p5, i32 %tid 196 %p7 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p6, i32 %tid 197 %v1 = load volatile <32 x float>, ptr addrspace(1) %p1 198 %v2 = load volatile <32 x float>, ptr addrspace(1) %p2 199 %v3 = load volatile <32 x float>, ptr addrspace(1) %p3 200 %v4 = load volatile <32 x float>, ptr addrspace(1) %p4 201 %v5 = load volatile <32 x float>, ptr addrspace(1) %p5 202 %v6 = load volatile <32 x float>, ptr addrspace(1) %p6 203 %v7 = load volatile <32 x float>, ptr addrspace(1) %p7 204 br label %st 205 206st: 207 store volatile <32 x float> %arg0, ptr addrspace(1) undef 208 store volatile <32 x float> %arg1, ptr addrspace(1) undef 209 store volatile <32 x float> %v1, ptr addrspace(1) undef 210 store volatile <32 x float> %v2, ptr addrspace(1) undef 211 store volatile <32 x float> %v3, ptr addrspace(1) undef 212 store volatile <32 x float> %v4, ptr addrspace(1) undef 213 store volatile <32 x float> %v5, ptr addrspace(1) undef 214 store volatile <32 x float> %v6, ptr addrspace(1) undef 215 store volatile <32 x float> %v7, ptr addrspace(1) undef 216 ret void 217} 218 219 220declare i32 @llvm.amdgcn.workitem.id.x() 221 222attributes #0 = { nounwind "amdgpu-num-vgpr"="10" } 223attributes #1 = { "amdgpu-flat-work-group-size"="1,256" } 224attributes #2 = { nounwind "amdgpu-num-vgpr"="11" } 225