xref: /llvm-project/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll (revision c897c13dde3bb413e723317c0579781fb6045a8b)
1; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX908 %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX90A %s
3
4; GCN-LABEL: {{^}}kernel_32_agprs:
5; GFX908: .amdhsa_next_free_vgpr 32
6; GFX90A: .amdhsa_next_free_vgpr 44
7; GFX90A: .amdhsa_accum_offset 12
8; GCN:    NumVgprs: 9
9; GCN:    NumAgprs: 32
10; GFX908: TotalNumVgprs: 32
11; GFX90A: TotalNumVgprs: 44
12; GFX908: VGPRBlocks: 7
13; GFX90A: VGPRBlocks: 5
14; GFX908: NumVGPRsForWavesPerEU: 32
15; GFX90A: NumVGPRsForWavesPerEU: 44
16; GFX90A: AccumOffset: 12
17; GCN:    Occupancy: 8
18; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 2
19define amdgpu_kernel void @kernel_32_agprs() #0 {
20bb:
21  call void asm sideeffect "", "~{v8}" ()
22  call void asm sideeffect "", "~{a31}" ()
23  ret void
24}
25
26; GCN-LABEL: {{^}}kernel_0_agprs:
27; GCN:    .amdhsa_next_free_vgpr 1
28; GFX90A: .amdhsa_accum_offset 4
29; GCN:    NumVgprs: 1
30; GCN:    NumAgprs: 0
31; GCN:    TotalNumVgprs: 1
32; GCN:    VGPRBlocks: 0
33; GCN:    NumVGPRsForWavesPerEU: 1
34; GFX90A: AccumOffset: 4
35; GFX908: Occupancy: 10
36; GFX90A: Occupancy: 8
37; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 0
38define amdgpu_kernel void @kernel_0_agprs() #0 {
39bb:
40  call void asm sideeffect "", "~{v0}" ()
41  ret void
42}
43
44; GCN-LABEL: {{^}}kernel_40_vgprs:
45; GFX908: .amdhsa_next_free_vgpr 40
46; GFX90A: .amdhsa_next_free_vgpr 56
47; GFX90A: .amdhsa_accum_offset 40
48; GCN:    NumVgprs: 40
49; GCN:    NumAgprs: 16
50; GFX908: TotalNumVgprs: 40
51; GFX90A: TotalNumVgprs: 56
52; GFX908: VGPRBlocks: 9
53; GFX90A: VGPRBlocks: 6
54; GFX908: NumVGPRsForWavesPerEU: 40
55; GFX90A: NumVGPRsForWavesPerEU: 56
56; GFX90A: AccumOffset: 40
57; GFX908: Occupancy: 6
58; GFX90A: Occupancy: 8
59; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 9
60define amdgpu_kernel void @kernel_40_vgprs() #0 {
61bb:
62  call void asm sideeffect "", "~{v39}" ()
63  call void asm sideeffect "", "~{a15}" ()
64  ret void
65}
66
67; GCN-LABEL: {{^}}func_32_agprs:
68; GCN:    NumVgprs: 9
69; GCN:    NumAgprs: 32
70; GFX908: TotalNumVgprs: 32
71; GFX90A: TotalNumVgprs: 44
72define void @func_32_agprs() #0 {
73bb:
74  call void asm sideeffect "", "~{v8}" ()
75  call void asm sideeffect "", "~{a31}" ()
76  ret void
77}
78
79; GCN-LABEL: {{^}}func_32_vgprs:
80; GCN:    NumVgprs: 32
81; GCN:    NumAgprs: 9
82; GFX908: TotalNumVgprs: 32
83; GFX90A: TotalNumVgprs: 41
84define void @func_32_vgprs() #0 {
85bb:
86  call void asm sideeffect "", "~{v31}" ()
87  call void asm sideeffect "", "~{a8}" ()
88  ret void
89}
90
91; GCN-LABEL: {{^}}func_0_agprs:
92; GCN:    NumVgprs: 1
93; GCN:    NumAgprs: 0
94; GCN:    TotalNumVgprs: 1
95define amdgpu_kernel void @func_0_agprs() #0 {
96bb:
97  call void asm sideeffect "", "~{v0}" ()
98  ret void
99}
100
101; GCN-LABEL: {{^}}kernel_max_gprs:
102; GFX908: .amdhsa_next_free_vgpr 256
103; GFX90A: .amdhsa_next_free_vgpr 512
104; GFX90A: .amdhsa_accum_offset 256
105; GCN:    NumVgprs: 256
106; GCN:    NumAgprs: 256
107; GFX908: TotalNumVgprs: 256
108; GFX90A: TotalNumVgprs: 512
109; GFX908: VGPRBlocks: 63
110; GFX90A: VGPRBlocks: 63
111; GFX908: NumVGPRsForWavesPerEU: 256
112; GFX90A: NumVGPRsForWavesPerEU: 512
113; GFX90A: AccumOffset: 256
114; GCN:    Occupancy: 1
115; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 63
116define amdgpu_kernel void @kernel_max_gprs() #0 {
117bb:
118  call void asm sideeffect "", "~{v255}" ()
119  call void asm sideeffect "", "~{a255}" ()
120  ret void
121}
122
123; GCN-LABEL: {{^}}kernel_call_func_32_agprs:
124; GFX908: .amdhsa_next_free_vgpr 32
125; GFX90A: .amdhsa_accum_offset 12
126; GCN:    NumVgprs: 9
127; GCN:    NumAgprs: 32
128; GFX908: TotalNumVgprs: 32
129; GFX90A: TotalNumVgprs: 44
130; GFX908: VGPRBlocks: 7
131; GFX90A: VGPRBlocks: 5
132; GFX908: NumVGPRsForWavesPerEU: 32
133; GFX90A: NumVGPRsForWavesPerEU: 44
134; GFX90A: AccumOffset: 12
135; GCN:    Occupancy: 8
136; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 2
137define amdgpu_kernel void @kernel_call_func_32_agprs() #0 {
138bb:
139  call void @func_32_agprs() #0
140  ret void
141}
142
143; GCN-LABEL: {{^}}func_call_func_32_agprs:
144; GCN:    NumVgprs: 9
145; GCN:    NumAgprs: 32
146; GFX908: TotalNumVgprs: 32
147; GFX90A: TotalNumVgprs: 44
148define void @func_call_func_32_agprs() #0 {
149bb:
150  call void @func_32_agprs() #0
151  ret void
152}
153
154declare void @undef_func()
155
156; GCN-LABEL: {{^}}kernel_call_undef_func:
157; GCN:    .amdhsa_next_free_vgpr max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)
158; GFX90A: .amdhsa_accum_offset ((((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
159; GCN:    .set kernel_call_undef_func.num_vgpr, max(32, amdgpu.max_num_vgpr)
160; GCN:    .set kernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr)
161; GCN:    NumVgprs: kernel_call_undef_func.num_vgpr
162; GCN:    NumAgprs: kernel_call_undef_func.num_agpr
163; GCN:    TotalNumVgprs: totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr)
164; GFX908: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 4))/4)-1
165; GFX90A: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 8))/8)-1
166; GCN:    NumVGPRsForWavesPerEU: max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)
167; GFX90A: AccumOffset: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)+1)*4
168; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0))
169; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0))
170; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63
171define amdgpu_kernel void @kernel_call_undef_func() #0 {
172bb:
173  call void @undef_func()
174  ret void
175}
176
177; GCN:      .set amdgpu.max_num_vgpr, 32
178; GCN-NEXT: .set amdgpu.max_num_agpr, 32
179; GCN-NEXT: .set amdgpu.max_num_sgpr, 34
180
181attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
182