1; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=si-pre-allocate-wwm-regs -o %t.mir %s 2; RUN: llc -run-pass=none -verify-machineinstrs %t.mir -o - | FileCheck %s 3 4; Test that SIMachineFunctionInfo can be round trip serialized through 5; MIR. 6 7@lds = addrspace(3) global [512 x float] undef, align 4 8 9; CHECK-LABEL: {{^}}name: kernel 10; CHECK: machineFunctionInfo: 11; CHECK-NEXT: explicitKernArgSize: 128 12; CHECK-NEXT: maxKernArgAlign: 64 13; CHECK-NEXT: ldsSize: 2048 14; CHECK-NEXT: gdsSize: 0 15; CHECK-NEXT: dynLDSAlign: 1 16; CHECK-NEXT: isEntryFunction: true 17; CHECK-NEXT: isChainFunction: false 18; CHECK-NEXT: noSignedZerosFPMath: false 19; CHECK-NEXT: memoryBound: false 20; CHECK-NEXT: waveLimiter: false 21; CHECK-NEXT: hasSpilledSGPRs: false 22; CHECK-NEXT: hasSpilledVGPRs: false 23; CHECK-NEXT: scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' 24; CHECK-NEXT: frameOffsetReg: '$fp_reg' 25; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32' 26; CHECK-NEXT: bytesInStackArgArea: 0 27; CHECK-NEXT: returnsVoid: true 28; CHECK-NEXT: argumentInfo: 29; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 30; CHECK-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } 31; CHECK-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } 32; CHECK-NEXT: kernargSegmentPtr: { reg: '$sgpr8_sgpr9' } 33; CHECK-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } 34; CHECK-NEXT: workGroupIDX: { reg: '$sgpr12' } 35; CHECK-NEXT: workGroupIDY: { reg: '$sgpr13' } 36; CHECK-NEXT: workGroupIDZ: { reg: '$sgpr14' } 37; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr15' } 38; CHECK-NEXT: workItemIDX: { reg: '$vgpr0' } 39; CHECK-NEXT: workItemIDY: { reg: '$vgpr1' } 40; CHECK-NEXT: workItemIDZ: { reg: '$vgpr2' } 41; CHECK-NEXT: psInputAddr: 0 42; CHECK-NEXT: psInputEnable: 0 43; CHECK-NEXT: maxMemoryClusterDWords: 8 44; CHECK-NEXT: mode: 45; CHECK-NEXT: ieee: true 46; CHECK-NEXT: dx10-clamp: true 47; CHECK-NEXT: fp32-input-denormals: true 48; CHECK-NEXT: fp32-output-denormals: true 49; CHECK-NEXT: fp64-fp16-input-denormals: true 50; CHECK-NEXT: fp64-fp16-output-denormals: true 51; CHECK-NEXT: highBitsOf32BitAddress: 0 52; CHECK-NEXT: occupancy: 8 53; CHECK-NEXT: vgprForAGPRCopy: '' 54; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' 55; CHECK-NEXT: longBranchReservedReg: '' 56; CHECK-NEXT: hasInitWholeWave: false 57; CHECK-NEXT: body: 58define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { 59 %gep = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %arg0 60 store float 0.0, ptr addrspace(3) %gep, align 4 61 ret void 62} 63 64@gds = addrspace(2) global [128 x i32] undef, align 4 65 66; CHECK-LABEL: {{^}}name: ps_shader 67; CHECK: machineFunctionInfo: 68; CHECK-NEXT: explicitKernArgSize: 0 69; CHECK-NEXT: maxKernArgAlign: 4 70; CHECK-NEXT: ldsSize: 0 71; CHECK-NEXT: gdsSize: 512 72; CHECK-NEXT: dynLDSAlign: 1 73; CHECK-NEXT: isEntryFunction: true 74; CHECK-NEXT: isChainFunction: false 75; CHECK-NEXT: noSignedZerosFPMath: false 76; CHECK-NEXT: memoryBound: false 77; CHECK-NEXT: waveLimiter: false 78; CHECK-NEXT: hasSpilledSGPRs: false 79; CHECK-NEXT: hasSpilledVGPRs: false 80; CHECK-NEXT: scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' 81; CHECK-NEXT: frameOffsetReg: '$fp_reg' 82; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32' 83; CHECK-NEXT: bytesInStackArgArea: 0 84; CHECK-NEXT: returnsVoid: true 85; CHECK-NEXT: argumentInfo: 86; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr3' } 87; CHECK-NEXT: implicitBufferPtr: { reg: '$sgpr0_sgpr1' } 88; CHECK-NEXT: psInputAddr: 1 89; CHECK-NEXT: psInputEnable: 1 90; CHECK-NEXT: maxMemoryClusterDWords: 8 91; CHECK-NEXT: mode: 92; CHECK-NEXT: ieee: false 93; CHECK-NEXT: dx10-clamp: true 94; CHECK-NEXT: fp32-input-denormals: true 95; CHECK-NEXT: fp32-output-denormals: true 96; CHECK-NEXT: fp64-fp16-input-denormals: true 97; CHECK-NEXT: fp64-fp16-output-denormals: true 98; CHECK-NEXT: highBitsOf32BitAddress: 0 99; CHECK-NEXT: occupancy: 10 100; CHECK-NEXT: vgprForAGPRCopy: '' 101; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' 102; CHECK-NEXT: longBranchReservedReg: '' 103; CHECK-NEXT: hasInitWholeWave: false 104; CHECK-NEXT: body: 105define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) { 106 %gep = getelementptr inbounds [128 x i32], ptr addrspace(2) @gds, i32 0, i32 %arg0 107 atomicrmw add ptr addrspace(2) %gep, i32 8 seq_cst 108 ret void 109} 110 111; CHECK-LABEL: {{^}}name: ps_shader_ps_input_enable 112; CHECK: machineFunctionInfo: 113; CHECK: psInputAddr: 36983 114; CHECK-NEXT: psInputEnable: 1{{$}} 115define amdgpu_ps void @ps_shader_ps_input_enable(i32 %arg0, i32 inreg %arg1) #7 { 116 %gep = getelementptr inbounds [128 x i32], ptr addrspace(2) @gds, i32 0, i32 %arg0 117 atomicrmw add ptr addrspace(2) %gep, i32 8 seq_cst 118 ret void 119} 120 121; CHECK-LABEL: {{^}}name: gds_size_shader 122; CHECK: gdsSize: 4096 123define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 { 124 ret void 125} 126 127; CHECK-LABEL: {{^}}name: function 128; CHECK: machineFunctionInfo: 129; CHECK-NEXT: explicitKernArgSize: 0 130; CHECK-NEXT: maxKernArgAlign: 1 131; CHECK-NEXT: ldsSize: 0 132; CHECK-NEXT: gdsSize: 0 133; CHECK-NEXT: dynLDSAlign: 1 134; CHECK-NEXT: isEntryFunction: false 135; CHECK-NEXT: isChainFunction: false 136; CHECK-NEXT: noSignedZerosFPMath: false 137; CHECK-NEXT: memoryBound: false 138; CHECK-NEXT: waveLimiter: false 139; CHECK-NEXT: hasSpilledSGPRs: false 140; CHECK-NEXT: hasSpilledVGPRs: false 141; CHECK-NEXT: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' 142; CHECK-NEXT: frameOffsetReg: '$sgpr33' 143; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32' 144; CHECK-NEXT: bytesInStackArgArea: 0 145; CHECK-NEXT: returnsVoid: true 146; CHECK-NEXT: argumentInfo: 147; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 148; CHECK-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } 149; CHECK-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } 150; CHECK-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } 151; CHECK-NEXT: workGroupIDX: { reg: '$sgpr12' } 152; CHECK-NEXT: workGroupIDY: { reg: '$sgpr13' } 153; CHECK-NEXT: workGroupIDZ: { reg: '$sgpr14' } 154; CHECK-NEXT: LDSKernelId: { reg: '$sgpr15' } 155; CHECK-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } 156; CHECK-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } 157; CHECK-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } 158; CHECK-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } 159; CHECK-NEXT: psInputAddr: 0 160; CHECK-NEXT: psInputEnable: 0 161; CHECK-NEXT: maxMemoryClusterDWords: 8 162; CHECK-NEXT: mode: 163; CHECK-NEXT: ieee: true 164; CHECK-NEXT: dx10-clamp: true 165; CHECK-NEXT: fp32-input-denormals: true 166; CHECK-NEXT: fp32-output-denormals: true 167; CHECK-NEXT: fp64-fp16-input-denormals: true 168; CHECK-NEXT: fp64-fp16-output-denormals: true 169; CHECK-NEXT: highBitsOf32BitAddress: 0 170; CHECK-NEXT: occupancy: 10 171; CHECK-NEXT: vgprForAGPRCopy: '' 172; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' 173; CHECK-NEXT: longBranchReservedReg: '' 174; CHECK-NEXT: hasInitWholeWave: false 175; CHECK-NEXT: body: 176define void @function() { 177 ret void 178} 179 180; CHECK-LABEL: {{^}}name: function_nsz 181; CHECK: machineFunctionInfo: 182; CHECK-NEXT: explicitKernArgSize: 0 183; CHECK-NEXT: maxKernArgAlign: 1 184; CHECK-NEXT: ldsSize: 0 185; CHECK-NEXT: gdsSize: 0 186; CHECK-NEXT: dynLDSAlign: 1 187; CHECK-NEXT: isEntryFunction: false 188; CHECK-NEXT: isChainFunction: false 189; CHECK-NEXT: noSignedZerosFPMath: true 190; CHECK-NEXT: memoryBound: false 191; CHECK-NEXT: waveLimiter: false 192; CHECK-NEXT: hasSpilledSGPRs: false 193; CHECK-NEXT: hasSpilledVGPRs: false 194; CHECK-NEXT: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' 195; CHECK-NEXT: frameOffsetReg: '$sgpr33' 196; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32' 197; CHECK-NEXT: bytesInStackArgArea: 0 198; CHECK-NEXT: returnsVoid: true 199; CHECK-NEXT: argumentInfo: 200; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 201; CHECK-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } 202; CHECK-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } 203; CHECK-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } 204; CHECK-NEXT: workGroupIDX: { reg: '$sgpr12' } 205; CHECK-NEXT: workGroupIDY: { reg: '$sgpr13' } 206; CHECK-NEXT: workGroupIDZ: { reg: '$sgpr14' } 207; CHECK-NEXT: LDSKernelId: { reg: '$sgpr15' } 208; CHECK-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } 209; CHECK-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } 210; CHECK-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } 211; CHECK-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } 212; CHECK-NEXT: psInputAddr: 0 213; CHECK-NEXT: psInputEnable: 0 214; CHECK-NEXT: maxMemoryClusterDWords: 8 215; CHECK-NEXT: mode: 216; CHECK-NEXT: ieee: true 217; CHECK-NEXT: dx10-clamp: true 218; CHECK-NEXT: fp32-input-denormals: true 219; CHECK-NEXT: fp32-output-denormals: true 220; CHECK-NEXT: fp64-fp16-input-denormals: true 221; CHECK-NEXT: fp64-fp16-output-denormals: true 222; CHECK-NEXT: highBitsOf32BitAddress: 0 223; CHECK-NEXT: occupancy: 10 224; CHECK-NEXT: vgprForAGPRCopy: '' 225; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' 226; CHECK-NEXT: longBranchReservedReg: '' 227; CHECK-NEXT: hasInitWholeWave: false 228; CHECK-NEXT: body: 229define void @function_nsz() #0 { 230 ret void 231} 232 233; CHECK-LABEL: {{^}}name: function_dx10_clamp_off 234; CHECK: mode: 235; CHECK-NEXT: ieee: true 236; CHECK-NEXT: dx10-clamp: false 237; CHECK-NEXT: fp32-input-denormals: true 238; CHECK-NEXT: fp32-output-denormals: true 239; CHECK-NEXT: fp64-fp16-input-denormals: true 240; CHECK-NEXT: fp64-fp16-output-denormals: true 241define void @function_dx10_clamp_off() #1 { 242 ret void 243} 244 245; CHECK-LABEL: {{^}}name: function_ieee_off 246; CHECK: mode: 247; CHECK-NEXT: ieee: false 248; CHECK-NEXT: dx10-clamp: true 249; CHECK-NEXT: fp32-input-denormals: true 250; CHECK-NEXT: fp32-output-denormals: true 251; CHECK-NEXT: fp64-fp16-input-denormals: true 252; CHECK-NEXT: fp64-fp16-output-denormals: true 253define void @function_ieee_off() #2 { 254 ret void 255} 256 257; CHECK-LABEL: {{^}}name: function_ieee_off_dx10_clamp_off 258; CHECK: mode: 259; CHECK-NEXT: ieee: false 260; CHECK-NEXT: dx10-clamp: false 261; CHECK-NEXT: fp32-input-denormals: true 262; CHECK-NEXT: fp32-output-denormals: true 263; CHECK-NEXT: fp64-fp16-input-denormals: true 264; CHECK-NEXT: fp64-fp16-output-denormals: true 265define void @function_ieee_off_dx10_clamp_off() #3 { 266 ret void 267} 268 269; CHECK-LABEL: {{^}}name: high_address_bits 270; CHECK: machineFunctionInfo: 271; CHECK: highBitsOf32BitAddress: 4294934528 272define amdgpu_ps void @high_address_bits() #4 { 273 ret void 274} 275 276; CHECK-LABEL: {{^}}name: wwm_reserved_regs 277; CHECK: wwmReservedRegs: 278; CHECK-NEXT: - '$vgpr2' 279; CHECK-NEXT: - '$vgpr3' 280define amdgpu_cs void @wwm_reserved_regs(ptr addrspace(1) %ptr, <4 x i32> inreg %tmp14) { 281 %ld0 = load volatile i32, ptr addrspace(1) %ptr 282 %ld1 = load volatile i32, ptr addrspace(1) %ptr 283 %inactive0 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %ld1, i32 0) 284 %inactive1 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %ld0, i32 0) 285 %wwm0 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %inactive0) 286 %wwm1 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %inactive1) 287 store volatile i32 %wwm0, ptr addrspace(1) %ptr 288 store volatile i32 %wwm1, ptr addrspace(1) %ptr 289 ret void 290} 291 292declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #6 293declare i32 @llvm.amdgcn.strict.wwm.i32(i32) #6 294 295attributes #0 = { "no-signed-zeros-fp-math" = "true" } 296attributes #1 = { "amdgpu-dx10-clamp" = "false" } 297attributes #2 = { "amdgpu-ieee" = "false" } 298attributes #3 = { "amdgpu-dx10-clamp" = "false" "amdgpu-ieee" = "false" } 299attributes #4 = { "amdgpu-32bit-address-high-bits"="0xffff8000" } 300attributes #5 = { "amdgpu-gds-size"="4096" } 301attributes #6 = { convergent nounwind readnone willreturn } 302attributes #7 = { "InitialPSInputAddr"="36983" } 303