xref: /llvm-project/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=si-pre-allocate-wwm-regs -o %t.mir %s
2; RUN: llc -run-pass=none -verify-machineinstrs %t.mir -o - | FileCheck %s
3
4; Test that SIMachineFunctionInfo can be round trip serialized through
5; MIR.
6
7@lds = addrspace(3) global [512 x float] undef, align 4
8
9; CHECK-LABEL: {{^}}name: kernel
10; CHECK: machineFunctionInfo:
11; CHECK-NEXT: explicitKernArgSize: 128
12; CHECK-NEXT: maxKernArgAlign: 64
13; CHECK-NEXT: ldsSize: 2048
14; CHECK-NEXT: gdsSize: 0
15; CHECK-NEXT: dynLDSAlign: 1
16; CHECK-NEXT: isEntryFunction: true
17; CHECK-NEXT: isChainFunction: false
18; CHECK-NEXT: noSignedZerosFPMath: false
19; CHECK-NEXT: memoryBound: false
20; CHECK-NEXT: waveLimiter: false
21; CHECK-NEXT: hasSpilledSGPRs: false
22; CHECK-NEXT: hasSpilledVGPRs: false
23; CHECK-NEXT: scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
24; CHECK-NEXT: frameOffsetReg:  '$fp_reg'
25; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32'
26; CHECK-NEXT: bytesInStackArgArea: 0
27; CHECK-NEXT: returnsVoid: true
28; CHECK-NEXT: argumentInfo:
29; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
30; CHECK-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
31; CHECK-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
32; CHECK-NEXT: kernargSegmentPtr: { reg: '$sgpr8_sgpr9' }
33; CHECK-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
34; CHECK-NEXT: workGroupIDX: { reg: '$sgpr12' }
35; CHECK-NEXT: workGroupIDY: { reg: '$sgpr13' }
36; CHECK-NEXT: workGroupIDZ: { reg: '$sgpr14' }
37; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr15' }
38; CHECK-NEXT: workItemIDX: { reg: '$vgpr0' }
39; CHECK-NEXT: workItemIDY: { reg: '$vgpr1' }
40; CHECK-NEXT: workItemIDZ: { reg: '$vgpr2' }
41; CHECK-NEXT: psInputAddr: 0
42; CHECK-NEXT: psInputEnable: 0
43; CHECK-NEXT: maxMemoryClusterDWords: 8
44; CHECK-NEXT: mode:
45; CHECK-NEXT: ieee: true
46; CHECK-NEXT: dx10-clamp: true
47; CHECK-NEXT: fp32-input-denormals: true
48; CHECK-NEXT: fp32-output-denormals: true
49; CHECK-NEXT: fp64-fp16-input-denormals: true
50; CHECK-NEXT: fp64-fp16-output-denormals: true
51; CHECK-NEXT: highBitsOf32BitAddress: 0
52; CHECK-NEXT: occupancy: 8
53; CHECK-NEXT: vgprForAGPRCopy: ''
54; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
55; CHECK-NEXT: longBranchReservedReg: ''
56; CHECK-NEXT: hasInitWholeWave: false
57; CHECK-NEXT: body:
58define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
59  %gep = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %arg0
60  store float 0.0, ptr addrspace(3) %gep, align 4
61  ret void
62}
63
64@gds = addrspace(2) global [128 x i32] undef, align 4
65
66; CHECK-LABEL: {{^}}name: ps_shader
67; CHECK: machineFunctionInfo:
68; CHECK-NEXT: explicitKernArgSize: 0
69; CHECK-NEXT: maxKernArgAlign: 4
70; CHECK-NEXT: ldsSize: 0
71; CHECK-NEXT: gdsSize: 512
72; CHECK-NEXT: dynLDSAlign: 1
73; CHECK-NEXT: isEntryFunction: true
74; CHECK-NEXT: isChainFunction: false
75; CHECK-NEXT: noSignedZerosFPMath: false
76; CHECK-NEXT: memoryBound: false
77; CHECK-NEXT: waveLimiter: false
78; CHECK-NEXT: hasSpilledSGPRs: false
79; CHECK-NEXT: hasSpilledVGPRs: false
80; CHECK-NEXT: scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
81; CHECK-NEXT: frameOffsetReg:  '$fp_reg'
82; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32'
83; CHECK-NEXT: bytesInStackArgArea: 0
84; CHECK-NEXT: returnsVoid: true
85; CHECK-NEXT: argumentInfo:
86; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr3' }
87; CHECK-NEXT: implicitBufferPtr: { reg: '$sgpr0_sgpr1' }
88; CHECK-NEXT: psInputAddr: 1
89; CHECK-NEXT: psInputEnable: 1
90; CHECK-NEXT: maxMemoryClusterDWords: 8
91; CHECK-NEXT: mode:
92; CHECK-NEXT: ieee: false
93; CHECK-NEXT: dx10-clamp: true
94; CHECK-NEXT: fp32-input-denormals: true
95; CHECK-NEXT: fp32-output-denormals: true
96; CHECK-NEXT: fp64-fp16-input-denormals: true
97; CHECK-NEXT: fp64-fp16-output-denormals: true
98; CHECK-NEXT: highBitsOf32BitAddress: 0
99; CHECK-NEXT: occupancy: 10
100; CHECK-NEXT: vgprForAGPRCopy: ''
101; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
102; CHECK-NEXT: longBranchReservedReg: ''
103; CHECK-NEXT: hasInitWholeWave: false
104; CHECK-NEXT: body:
105define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
106  %gep = getelementptr inbounds [128 x i32], ptr addrspace(2) @gds, i32 0, i32 %arg0
107  atomicrmw add ptr addrspace(2) %gep, i32 8 seq_cst
108  ret void
109}
110
111; CHECK-LABEL: {{^}}name: ps_shader_ps_input_enable
112; CHECK: machineFunctionInfo:
113; CHECK: psInputAddr: 36983
114; CHECK-NEXT: psInputEnable: 1{{$}}
115define amdgpu_ps void @ps_shader_ps_input_enable(i32 %arg0, i32 inreg %arg1) #7 {
116  %gep = getelementptr inbounds [128 x i32], ptr addrspace(2) @gds, i32 0, i32 %arg0
117  atomicrmw add ptr addrspace(2) %gep, i32 8 seq_cst
118  ret void
119}
120
121; CHECK-LABEL: {{^}}name: gds_size_shader
122; CHECK: gdsSize: 4096
123define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
124  ret void
125}
126
127; CHECK-LABEL: {{^}}name: function
128; CHECK: machineFunctionInfo:
129; CHECK-NEXT: explicitKernArgSize: 0
130; CHECK-NEXT: maxKernArgAlign: 1
131; CHECK-NEXT: ldsSize: 0
132; CHECK-NEXT: gdsSize: 0
133; CHECK-NEXT: dynLDSAlign: 1
134; CHECK-NEXT: isEntryFunction: false
135; CHECK-NEXT: isChainFunction: false
136; CHECK-NEXT: noSignedZerosFPMath: false
137; CHECK-NEXT: memoryBound: false
138; CHECK-NEXT: waveLimiter: false
139; CHECK-NEXT: hasSpilledSGPRs: false
140; CHECK-NEXT: hasSpilledVGPRs: false
141; CHECK-NEXT: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
142; CHECK-NEXT: frameOffsetReg: '$sgpr33'
143; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32'
144; CHECK-NEXT: bytesInStackArgArea: 0
145; CHECK-NEXT: returnsVoid: true
146; CHECK-NEXT: argumentInfo:
147; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
148; CHECK-NEXT: dispatchPtr:     { reg: '$sgpr4_sgpr5' }
149; CHECK-NEXT: queuePtr:        { reg: '$sgpr6_sgpr7' }
150; CHECK-NEXT: dispatchID:      { reg: '$sgpr10_sgpr11' }
151; CHECK-NEXT: workGroupIDX:    { reg: '$sgpr12' }
152; CHECK-NEXT: workGroupIDY:    { reg: '$sgpr13' }
153; CHECK-NEXT: workGroupIDZ:    { reg: '$sgpr14' }
154; CHECK-NEXT: LDSKernelId:     { reg: '$sgpr15' }
155; CHECK-NEXT: implicitArgPtr:  { reg: '$sgpr8_sgpr9' }
156; CHECK-NEXT: workItemIDX:     { reg: '$vgpr31', mask: 1023 }
157; CHECK-NEXT: workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
158; CHECK-NEXT: workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
159; CHECK-NEXT: psInputAddr: 0
160; CHECK-NEXT: psInputEnable: 0
161; CHECK-NEXT: maxMemoryClusterDWords: 8
162; CHECK-NEXT: mode:
163; CHECK-NEXT: ieee: true
164; CHECK-NEXT: dx10-clamp: true
165; CHECK-NEXT: fp32-input-denormals: true
166; CHECK-NEXT: fp32-output-denormals: true
167; CHECK-NEXT: fp64-fp16-input-denormals: true
168; CHECK-NEXT: fp64-fp16-output-denormals: true
169; CHECK-NEXT: highBitsOf32BitAddress: 0
170; CHECK-NEXT: occupancy: 10
171; CHECK-NEXT: vgprForAGPRCopy: ''
172; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
173; CHECK-NEXT: longBranchReservedReg: ''
174; CHECK-NEXT: hasInitWholeWave: false
175; CHECK-NEXT: body:
176define void @function() {
177  ret void
178}
179
180; CHECK-LABEL: {{^}}name: function_nsz
181; CHECK: machineFunctionInfo:
182; CHECK-NEXT: explicitKernArgSize: 0
183; CHECK-NEXT: maxKernArgAlign: 1
184; CHECK-NEXT: ldsSize: 0
185; CHECK-NEXT: gdsSize: 0
186; CHECK-NEXT: dynLDSAlign: 1
187; CHECK-NEXT: isEntryFunction: false
188; CHECK-NEXT: isChainFunction: false
189; CHECK-NEXT: noSignedZerosFPMath: true
190; CHECK-NEXT: memoryBound: false
191; CHECK-NEXT: waveLimiter: false
192; CHECK-NEXT: hasSpilledSGPRs: false
193; CHECK-NEXT: hasSpilledVGPRs: false
194; CHECK-NEXT: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
195; CHECK-NEXT: frameOffsetReg: '$sgpr33'
196; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32'
197; CHECK-NEXT: bytesInStackArgArea: 0
198; CHECK-NEXT: returnsVoid: true
199; CHECK-NEXT: argumentInfo:
200; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
201; CHECK-NEXT: dispatchPtr:     { reg: '$sgpr4_sgpr5' }
202; CHECK-NEXT: queuePtr:        { reg: '$sgpr6_sgpr7' }
203; CHECK-NEXT: dispatchID:      { reg: '$sgpr10_sgpr11' }
204; CHECK-NEXT: workGroupIDX:    { reg: '$sgpr12' }
205; CHECK-NEXT: workGroupIDY:    { reg: '$sgpr13' }
206; CHECK-NEXT: workGroupIDZ:    { reg: '$sgpr14' }
207; CHECK-NEXT: LDSKernelId:     { reg: '$sgpr15' }
208; CHECK-NEXT: implicitArgPtr:  { reg: '$sgpr8_sgpr9' }
209; CHECK-NEXT: workItemIDX:     { reg: '$vgpr31', mask: 1023 }
210; CHECK-NEXT: workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
211; CHECK-NEXT: workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
212; CHECK-NEXT: psInputAddr: 0
213; CHECK-NEXT: psInputEnable: 0
214; CHECK-NEXT: maxMemoryClusterDWords: 8
215; CHECK-NEXT: mode:
216; CHECK-NEXT: ieee: true
217; CHECK-NEXT: dx10-clamp: true
218; CHECK-NEXT: fp32-input-denormals: true
219; CHECK-NEXT: fp32-output-denormals: true
220; CHECK-NEXT: fp64-fp16-input-denormals: true
221; CHECK-NEXT: fp64-fp16-output-denormals: true
222; CHECK-NEXT: highBitsOf32BitAddress: 0
223; CHECK-NEXT: occupancy: 10
224; CHECK-NEXT: vgprForAGPRCopy: ''
225; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
226; CHECK-NEXT: longBranchReservedReg: ''
227; CHECK-NEXT: hasInitWholeWave: false
228; CHECK-NEXT: body:
229define void @function_nsz() #0 {
230  ret void
231}
232
233; CHECK-LABEL: {{^}}name: function_dx10_clamp_off
234; CHECK: mode:
235; CHECK-NEXT: ieee: true
236; CHECK-NEXT: dx10-clamp: false
237; CHECK-NEXT: fp32-input-denormals: true
238; CHECK-NEXT: fp32-output-denormals: true
239; CHECK-NEXT: fp64-fp16-input-denormals: true
240; CHECK-NEXT: fp64-fp16-output-denormals: true
241define void @function_dx10_clamp_off() #1 {
242  ret void
243}
244
245; CHECK-LABEL: {{^}}name: function_ieee_off
246; CHECK: mode:
247; CHECK-NEXT: ieee: false
248; CHECK-NEXT: dx10-clamp: true
249; CHECK-NEXT: fp32-input-denormals: true
250; CHECK-NEXT: fp32-output-denormals: true
251; CHECK-NEXT: fp64-fp16-input-denormals: true
252; CHECK-NEXT: fp64-fp16-output-denormals: true
253define void @function_ieee_off() #2 {
254  ret void
255}
256
257; CHECK-LABEL: {{^}}name: function_ieee_off_dx10_clamp_off
258; CHECK: mode:
259; CHECK-NEXT: ieee: false
260; CHECK-NEXT: dx10-clamp: false
261; CHECK-NEXT: fp32-input-denormals: true
262; CHECK-NEXT: fp32-output-denormals: true
263; CHECK-NEXT: fp64-fp16-input-denormals: true
264; CHECK-NEXT: fp64-fp16-output-denormals: true
265define void @function_ieee_off_dx10_clamp_off() #3 {
266  ret void
267}
268
269; CHECK-LABEL: {{^}}name: high_address_bits
270; CHECK: machineFunctionInfo:
271; CHECK: highBitsOf32BitAddress: 4294934528
272define amdgpu_ps void @high_address_bits() #4 {
273  ret void
274}
275
276; CHECK-LABEL: {{^}}name: wwm_reserved_regs
277; CHECK: wwmReservedRegs:
278; CHECK-NEXT: - '$vgpr2'
279; CHECK-NEXT: - '$vgpr3'
280define amdgpu_cs void @wwm_reserved_regs(ptr addrspace(1) %ptr, <4 x i32> inreg %tmp14) {
281  %ld0 = load volatile i32, ptr addrspace(1) %ptr
282  %ld1 = load volatile i32, ptr addrspace(1) %ptr
283  %inactive0 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %ld1, i32 0)
284  %inactive1 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %ld0, i32 0)
285  %wwm0 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %inactive0)
286  %wwm1 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %inactive1)
287  store volatile i32 %wwm0, ptr addrspace(1) %ptr
288  store volatile i32 %wwm1, ptr addrspace(1) %ptr
289  ret void
290}
291
292declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #6
293declare i32 @llvm.amdgcn.strict.wwm.i32(i32) #6
294
295attributes #0 = { "no-signed-zeros-fp-math" = "true" }
296attributes #1 = { "amdgpu-dx10-clamp" = "false" }
297attributes #2 = { "amdgpu-ieee" = "false" }
298attributes #3 = { "amdgpu-dx10-clamp" = "false" "amdgpu-ieee" = "false" }
299attributes #4 = { "amdgpu-32bit-address-high-bits"="0xffff8000" }
300attributes #5 = { "amdgpu-gds-size"="4096" }
301attributes #6 = { convergent nounwind readnone willreturn }
302attributes #7 = { "InitialPSInputAddr"="36983" }
303