1; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s 2 3; GFX908-LABEL: {{^}}max_11_vgprs_used_9a: 4; GFX908-NOT: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 5; GFX908-NOT: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1 6; GFX908-DAG: v_accvgpr_write_b32 [[A_REG:a[0-9]+]], v{{[0-9]}} 7; GFX908-NOT: buffer_store_dword v{{[0-9]}}, 8; GFX908-NOT: buffer_ 9; GFX908: v_mov_b32_e32 v{{[0-9]}}, [[V_REG:v[0-9]+]] 10; GFX908: v_accvgpr_read_b32 [[V_REG]], [[A_REG]] 11; GFX908-NOT: buffer_ 12 13; GFX908: NumVgprs: 10 14; GFX908: ScratchSize: 0 15; GFX908: VGPRBlocks: 2 16; GFX908: NumVGPRsForWavesPerEU: 10 17define amdgpu_kernel void @max_11_vgprs_used_9a(ptr addrspace(1) %p) #0 { 18 %tid = load volatile i32, ptr addrspace(1) undef 19 call void asm sideeffect "", "a,a,a,a,a,a,a,a,a"(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) 20 %p1 = getelementptr inbounds i32, ptr addrspace(1) %p, i32 %tid 21 %p2 = getelementptr inbounds i32, ptr addrspace(1) %p1, i32 4 22 %p3 = getelementptr inbounds i32, ptr addrspace(1) %p2, i32 8 23 %p4 = getelementptr inbounds i32, ptr addrspace(1) %p3, i32 12 24 %p5 = getelementptr inbounds i32, ptr addrspace(1) %p4, i32 16 25 %p6 = getelementptr inbounds i32, ptr addrspace(1) %p5, i32 20 26 %p7 = getelementptr inbounds i32, ptr addrspace(1) %p6, i32 24 27 %p8 = getelementptr inbounds i32, ptr addrspace(1) %p7, i32 28 28 %p9 = getelementptr inbounds i32, ptr addrspace(1) %p8, i32 32 29 %p10 = getelementptr inbounds i32, ptr addrspace(1) %p9, i32 36 30 %v1 = load volatile i32, ptr addrspace(1) %p1 31 %v2 = load volatile i32, ptr addrspace(1) %p2 32 %v3 = load volatile i32, ptr addrspace(1) %p3 33 %v4 = load volatile i32, ptr addrspace(1) %p4 34 %v5 = load volatile i32, ptr addrspace(1) %p5 35 %v6 = load volatile i32, ptr addrspace(1) %p6 36 %v7 = load volatile i32, ptr addrspace(1) %p7 37 %v8 = load volatile i32, ptr addrspace(1) %p8 38 %v9 = load volatile i32, ptr addrspace(1) %p9 39 %v10 = load volatile i32, ptr addrspace(1) %p10 40 call void asm sideeffect "", "v,v,v,v,v,v,v,v,v,v"(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, i32 %v6, i32 %v7, i32 %v8, i32 %v9, i32 %v10) 41 store volatile i32 %v1, ptr addrspace(1) undef 42 store volatile i32 %v2, ptr addrspace(1) undef 43 store volatile i32 %v3, ptr addrspace(1) undef 44 store volatile i32 %v4, ptr addrspace(1) undef 45 store volatile i32 %v5, ptr addrspace(1) undef 46 store volatile i32 %v6, ptr addrspace(1) undef 47 store volatile i32 %v7, ptr addrspace(1) undef 48 store volatile i32 %v8, ptr addrspace(1) undef 49 store volatile i32 %v9, ptr addrspace(1) undef 50 store volatile i32 %v10, ptr addrspace(1) undef 51 ret void 52} 53 54; GFX908-LABEL: {{^}}max_11_vgprs_used_1a_partial_spill: 55; GFX908-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 56; GFX908-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1 57; GFX908-DAG: v_accvgpr_write_b32 a0, 1 58; GFX908-DAG: buffer_store_dword v{{[0-9]}}, 59; GFX908-DAG: v_accvgpr_write_b32 a1, v{{[0-9]}} 60; GFX908-DAG: v_accvgpr_write_b32 a2, v{{[0-9]}} 61; GFX908-DAG: v_accvgpr_write_b32 a3, v{{[0-9]}} 62; GFX908-DAG: v_accvgpr_write_b32 a4, v{{[0-9]}} 63; GFX908-DAG: v_accvgpr_write_b32 a5, v{{[0-9]}} 64; GFX908-DAG: v_accvgpr_write_b32 a6, v{{[0-9]}} 65; GFX908-DAG: v_accvgpr_write_b32 a7, v{{[0-9]}} 66; GFX908-DAG: v_accvgpr_write_b32 a8, v{{[0-9]}} 67; GFX908-DAG: v_accvgpr_write_b32 a9, v{{[0-9]}} 68; GFX908-DAG: v_accvgpr_write_b32 a10, v{{[0-9]}} 69; GFX908-DAG: buffer_load_dword v{{[0-9]}}, 70; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a0 71; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a1 72; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a2 73; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a3 74; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a4 75; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a5 76; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a6 77; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a7 78; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a8 79; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a9 80; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a10 81 82; GFX908: NumVgprs: 10 83; GFX908: ScratchSize: 12 84; GFX908: VGPRBlocks: 2 85; GFX908: NumVGPRsForWavesPerEU: 11 86define amdgpu_kernel void @max_11_vgprs_used_1a_partial_spill(ptr addrspace(1) %p) #0 { 87 %tid = load volatile i32, ptr addrspace(1) undef 88 call void asm sideeffect "", "a"(i32 1) 89 %p1 = getelementptr inbounds i64, ptr addrspace(1) %p, i32 %tid 90 %p2 = getelementptr inbounds i64, ptr addrspace(1) %p1, i32 8 91 %p3 = getelementptr inbounds i64, ptr addrspace(1) %p2, i32 16 92 %p4 = getelementptr inbounds i64, ptr addrspace(1) %p3, i32 24 93 %p5 = getelementptr inbounds i64, ptr addrspace(1) %p4, i32 32 94 %v1 = load volatile i64, ptr addrspace(1) %p1 95 %v2 = load volatile i64, ptr addrspace(1) %p2 96 %v3 = load volatile i64, ptr addrspace(1) %p3 97 %v4 = load volatile i64, ptr addrspace(1) %p4 98 %v5 = load volatile i64, ptr addrspace(1) %p5 99 call void asm sideeffect "", "v,v,v,v,v"(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5) 100 store volatile i64 %v1, ptr addrspace(1) %p2 101 store volatile i64 %v2, ptr addrspace(1) %p3 102 store volatile i64 %v3, ptr addrspace(1) %p4 103 store volatile i64 %v4, ptr addrspace(1) %p5 104 store volatile i64 %v5, ptr addrspace(1) %p1 105 ret void 106} 107 108attributes #0 = { nounwind "amdgpu-num-vgpr"="11" } 109