1; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX908 %s 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX90A %s 3 4; GCN-LABEL: {{^}}kernel_32_agprs: 5; GFX908: .amdhsa_next_free_vgpr 32 6; GFX90A: .amdhsa_next_free_vgpr 44 7; GFX90A: .amdhsa_accum_offset 12 8; GCN: NumVgprs: 9 9; GCN: NumAgprs: 32 10; GFX908: TotalNumVgprs: 32 11; GFX90A: TotalNumVgprs: 44 12; GFX908: VGPRBlocks: 7 13; GFX90A: VGPRBlocks: 5 14; GFX908: NumVGPRsForWavesPerEU: 32 15; GFX90A: NumVGPRsForWavesPerEU: 44 16; GFX90A: AccumOffset: 12 17; GCN: Occupancy: 8 18; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 2 19define amdgpu_kernel void @kernel_32_agprs() #0 { 20bb: 21 call void asm sideeffect "", "~{v8}" () 22 call void asm sideeffect "", "~{a31}" () 23 ret void 24} 25 26; GCN-LABEL: {{^}}kernel_0_agprs: 27; GCN: .amdhsa_next_free_vgpr 1 28; GFX90A: .amdhsa_accum_offset 4 29; GCN: NumVgprs: 1 30; GCN: NumAgprs: 0 31; GCN: TotalNumVgprs: 1 32; GCN: VGPRBlocks: 0 33; GCN: NumVGPRsForWavesPerEU: 1 34; GFX90A: AccumOffset: 4 35; GFX908: Occupancy: 10 36; GFX90A: Occupancy: 8 37; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 0 38define amdgpu_kernel void @kernel_0_agprs() #0 { 39bb: 40 call void asm sideeffect "", "~{v0}" () 41 ret void 42} 43 44; GCN-LABEL: {{^}}kernel_40_vgprs: 45; GFX908: .amdhsa_next_free_vgpr 40 46; GFX90A: .amdhsa_next_free_vgpr 56 47; GFX90A: .amdhsa_accum_offset 40 48; GCN: NumVgprs: 40 49; GCN: NumAgprs: 16 50; GFX908: TotalNumVgprs: 40 51; GFX90A: TotalNumVgprs: 56 52; GFX908: VGPRBlocks: 9 53; GFX90A: VGPRBlocks: 6 54; GFX908: NumVGPRsForWavesPerEU: 40 55; GFX90A: NumVGPRsForWavesPerEU: 56 56; GFX90A: AccumOffset: 40 57; GFX908: Occupancy: 6 58; GFX90A: Occupancy: 8 59; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 9 60define amdgpu_kernel void @kernel_40_vgprs() #0 { 61bb: 62 call void asm sideeffect "", "~{v39}" () 63 call void asm sideeffect "", "~{a15}" () 64 ret void 65} 66 67; GCN-LABEL: {{^}}func_32_agprs: 68; GCN: NumVgprs: 9 69; GCN: NumAgprs: 32 70; GFX908: TotalNumVgprs: 32 71; GFX90A: TotalNumVgprs: 44 72define void @func_32_agprs() #0 { 73bb: 74 call void asm sideeffect "", "~{v8}" () 75 call void asm sideeffect "", "~{a31}" () 76 ret void 77} 78 79; GCN-LABEL: {{^}}func_32_vgprs: 80; GCN: NumVgprs: 32 81; GCN: NumAgprs: 9 82; GFX908: TotalNumVgprs: 32 83; GFX90A: TotalNumVgprs: 41 84define void @func_32_vgprs() #0 { 85bb: 86 call void asm sideeffect "", "~{v31}" () 87 call void asm sideeffect "", "~{a8}" () 88 ret void 89} 90 91; GCN-LABEL: {{^}}func_0_agprs: 92; GCN: NumVgprs: 1 93; GCN: NumAgprs: 0 94; GCN: TotalNumVgprs: 1 95define amdgpu_kernel void @func_0_agprs() #0 { 96bb: 97 call void asm sideeffect "", "~{v0}" () 98 ret void 99} 100 101; GCN-LABEL: {{^}}kernel_max_gprs: 102; GFX908: .amdhsa_next_free_vgpr 256 103; GFX90A: .amdhsa_next_free_vgpr 512 104; GFX90A: .amdhsa_accum_offset 256 105; GCN: NumVgprs: 256 106; GCN: NumAgprs: 256 107; GFX908: TotalNumVgprs: 256 108; GFX90A: TotalNumVgprs: 512 109; GFX908: VGPRBlocks: 63 110; GFX90A: VGPRBlocks: 63 111; GFX908: NumVGPRsForWavesPerEU: 256 112; GFX90A: NumVGPRsForWavesPerEU: 512 113; GFX90A: AccumOffset: 256 114; GCN: Occupancy: 1 115; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 63 116define amdgpu_kernel void @kernel_max_gprs() #0 { 117bb: 118 call void asm sideeffect "", "~{v255}" () 119 call void asm sideeffect "", "~{a255}" () 120 ret void 121} 122 123; GCN-LABEL: {{^}}kernel_call_func_32_agprs: 124; GFX908: .amdhsa_next_free_vgpr 32 125; GFX90A: .amdhsa_accum_offset 12 126; GCN: NumVgprs: 9 127; GCN: NumAgprs: 32 128; GFX908: TotalNumVgprs: 32 129; GFX90A: TotalNumVgprs: 44 130; GFX908: VGPRBlocks: 7 131; GFX90A: VGPRBlocks: 5 132; GFX908: NumVGPRsForWavesPerEU: 32 133; GFX90A: NumVGPRsForWavesPerEU: 44 134; GFX90A: AccumOffset: 12 135; GCN: Occupancy: 8 136; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 2 137define amdgpu_kernel void @kernel_call_func_32_agprs() #0 { 138bb: 139 call void @func_32_agprs() #0 140 ret void 141} 142 143; GCN-LABEL: {{^}}func_call_func_32_agprs: 144; GCN: NumVgprs: 9 145; GCN: NumAgprs: 32 146; GFX908: TotalNumVgprs: 32 147; GFX90A: TotalNumVgprs: 44 148define void @func_call_func_32_agprs() #0 { 149bb: 150 call void @func_32_agprs() #0 151 ret void 152} 153 154declare void @undef_func() 155 156; GCN-LABEL: {{^}}kernel_call_undef_func: 157; GCN: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0) 158; GFX90A: .amdhsa_accum_offset ((((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4 159; GCN: .set kernel_call_undef_func.num_vgpr, max(32, amdgpu.max_num_vgpr) 160; GCN: .set kernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr) 161; GCN: NumVgprs: kernel_call_undef_func.num_vgpr 162; GCN: NumAgprs: kernel_call_undef_func.num_agpr 163; GCN: TotalNumVgprs: totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr) 164; GFX908: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 4))/4)-1 165; GFX90A: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 8))/8)-1 166; GCN: NumVGPRsForWavesPerEU: max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0) 167; GFX90A: AccumOffset: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)+1)*4 168; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)) 169; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)) 170; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63 171define amdgpu_kernel void @kernel_call_undef_func() #0 { 172bb: 173 call void @undef_func() 174 ret void 175} 176 177; GCN: .set amdgpu.max_num_vgpr, 32 178; GCN-NEXT: .set amdgpu.max_num_agpr, 32 179; GCN-NEXT: .set amdgpu.max_num_sgpr, 34 180 181attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 182