1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefix=GCN %s 2 3; Functions that don't make calls should have constants as its resource usage as no resource information has to be propagated. 4 5; GCN-LABEL: {{^}}use_vcc: 6; GCN: .set use_vcc.num_vgpr, 0 7; GCN: .set use_vcc.num_agpr, 0 8; GCN: .set use_vcc.numbered_sgpr, 32 9; GCN: .set use_vcc.private_seg_size, 0 10; GCN: .set use_vcc.uses_vcc, 1 11; GCN: .set use_vcc.uses_flat_scratch, 0 12; GCN: .set use_vcc.has_dyn_sized_stack, 0 13; GCN: .set use_vcc.has_recursion, 0 14; GCN: .set use_vcc.has_indirect_call, 0 15; GCN: TotalNumSgprs: 36 16; GCN: NumVgprs: 0 17; GCN: ScratchSize: 0 18define void @use_vcc() #1 { 19 call void asm sideeffect "", "~{vcc}" () #0 20 ret void 21} 22 23; GCN-LABEL: {{^}}indirect_use_vcc: 24; GCN: .set indirect_use_vcc.num_vgpr, max(41, use_vcc.num_vgpr) 25; GCN: .set indirect_use_vcc.num_agpr, max(0, use_vcc.num_agpr) 26; GCN: .set indirect_use_vcc.numbered_sgpr, max(34, use_vcc.numbered_sgpr) 27; GCN: .set indirect_use_vcc.private_seg_size, 16+(max(use_vcc.private_seg_size)) 28; GCN: .set indirect_use_vcc.uses_vcc, or(1, use_vcc.uses_vcc) 29; GCN: .set indirect_use_vcc.uses_flat_scratch, or(0, use_vcc.uses_flat_scratch) 30; GCN: .set indirect_use_vcc.has_dyn_sized_stack, or(0, use_vcc.has_dyn_sized_stack) 31; GCN: .set indirect_use_vcc.has_recursion, or(0, use_vcc.has_recursion) 32; GCN: .set indirect_use_vcc.has_indirect_call, or(0, use_vcc.has_indirect_call) 33; GCN: TotalNumSgprs: 38 34; GCN: NumVgprs: 41 35; GCN: ScratchSize: 16 36define void @indirect_use_vcc() #1 { 37 call void @use_vcc() 38 ret void 39} 40 41; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel: 42; GCN: .set indirect_2level_use_vcc_kernel.num_vgpr, max(32, indirect_use_vcc.num_vgpr) 43; GCN: .set indirect_2level_use_vcc_kernel.num_agpr, max(0, indirect_use_vcc.num_agpr) 44; GCN: .set indirect_2level_use_vcc_kernel.numbered_sgpr, max(33, indirect_use_vcc.numbered_sgpr) 45; GCN: .set indirect_2level_use_vcc_kernel.private_seg_size, 0+(max(indirect_use_vcc.private_seg_size)) 46; GCN: .set indirect_2level_use_vcc_kernel.uses_vcc, or(1, indirect_use_vcc.uses_vcc) 47; GCN: .set indirect_2level_use_vcc_kernel.uses_flat_scratch, or(1, indirect_use_vcc.uses_flat_scratch) 48; GCN: .set indirect_2level_use_vcc_kernel.has_dyn_sized_stack, or(0, indirect_use_vcc.has_dyn_sized_stack) 49; GCN: .set indirect_2level_use_vcc_kernel.has_recursion, or(0, indirect_use_vcc.has_recursion) 50; GCN: .set indirect_2level_use_vcc_kernel.has_indirect_call, or(0, indirect_use_vcc.has_indirect_call) 51; GCN: TotalNumSgprs: 40 52; GCN: NumVgprs: 41 53; GCN: ScratchSize: 16 54define amdgpu_kernel void @indirect_2level_use_vcc_kernel(ptr addrspace(1) %out) #0 { 55 call void @indirect_use_vcc() 56 ret void 57} 58 59; GCN-LABEL: {{^}}use_flat_scratch: 60; GCN: .set use_flat_scratch.num_vgpr, 0 61; GCN: .set use_flat_scratch.num_agpr, 0 62; GCN: .set use_flat_scratch.numbered_sgpr, 32 63; GCN: .set use_flat_scratch.private_seg_size, 0 64; GCN: .set use_flat_scratch.uses_vcc, 0 65; GCN: .set use_flat_scratch.uses_flat_scratch, 1 66; GCN: .set use_flat_scratch.has_dyn_sized_stack, 0 67; GCN: .set use_flat_scratch.has_recursion, 0 68; GCN: .set use_flat_scratch.has_indirect_call, 0 69; GCN: TotalNumSgprs: 38 70; GCN: NumVgprs: 0 71; GCN: ScratchSize: 0 72define void @use_flat_scratch() #1 { 73 call void asm sideeffect "", "~{flat_scratch}" () #0 74 ret void 75} 76 77; GCN-LABEL: {{^}}indirect_use_flat_scratch: 78; GCN: .set indirect_use_flat_scratch.num_vgpr, max(41, use_flat_scratch.num_vgpr) 79; GCN: .set indirect_use_flat_scratch.num_agpr, max(0, use_flat_scratch.num_agpr) 80; GCN: .set indirect_use_flat_scratch.numbered_sgpr, max(34, use_flat_scratch.numbered_sgpr) 81; GCN: .set indirect_use_flat_scratch.private_seg_size, 16+(max(use_flat_scratch.private_seg_size)) 82; GCN: .set indirect_use_flat_scratch.uses_vcc, or(1, use_flat_scratch.uses_vcc) 83; GCN: .set indirect_use_flat_scratch.uses_flat_scratch, or(0, use_flat_scratch.uses_flat_scratch) 84; GCN: .set indirect_use_flat_scratch.has_dyn_sized_stack, or(0, use_flat_scratch.has_dyn_sized_stack) 85; GCN: .set indirect_use_flat_scratch.has_recursion, or(0, use_flat_scratch.has_recursion) 86; GCN: .set indirect_use_flat_scratch.has_indirect_call, or(0, use_flat_scratch.has_indirect_call) 87; GCN: TotalNumSgprs: 40 88; GCN: NumVgprs: 41 89; GCN: ScratchSize: 16 90define void @indirect_use_flat_scratch() #1 { 91 call void @use_flat_scratch() 92 ret void 93} 94 95; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel: 96; GCN: .set indirect_2level_use_flat_scratch_kernel.num_vgpr, max(32, indirect_use_flat_scratch.num_vgpr) 97; GCN: .set indirect_2level_use_flat_scratch_kernel.num_agpr, max(0, indirect_use_flat_scratch.num_agpr) 98; GCN: .set indirect_2level_use_flat_scratch_kernel.numbered_sgpr, max(33, indirect_use_flat_scratch.numbered_sgpr) 99; GCN: .set indirect_2level_use_flat_scratch_kernel.private_seg_size, 0+(max(indirect_use_flat_scratch.private_seg_size)) 100; GCN: .set indirect_2level_use_flat_scratch_kernel.uses_vcc, or(1, indirect_use_flat_scratch.uses_vcc) 101; GCN: .set indirect_2level_use_flat_scratch_kernel.uses_flat_scratch, or(1, indirect_use_flat_scratch.uses_flat_scratch) 102; GCN: .set indirect_2level_use_flat_scratch_kernel.has_dyn_sized_stack, or(0, indirect_use_flat_scratch.has_dyn_sized_stack) 103; GCN: .set indirect_2level_use_flat_scratch_kernel.has_recursion, or(0, indirect_use_flat_scratch.has_recursion) 104; GCN: .set indirect_2level_use_flat_scratch_kernel.has_indirect_call, or(0, indirect_use_flat_scratch.has_indirect_call) 105; GCN: TotalNumSgprs: 40 106; GCN: NumVgprs: 41 107; GCN: ScratchSize: 16 108define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(ptr addrspace(1) %out) #0 { 109 call void @indirect_use_flat_scratch() 110 ret void 111} 112 113; GCN-LABEL: {{^}}use_10_vgpr: 114; GCN: .set use_10_vgpr.num_vgpr, 10 115; GCN: .set use_10_vgpr.num_agpr, 0 116; GCN: .set use_10_vgpr.numbered_sgpr, 32 117; GCN: .set use_10_vgpr.private_seg_size, 0 118; GCN: .set use_10_vgpr.uses_vcc, 0 119; GCN: .set use_10_vgpr.uses_flat_scratch, 0 120; GCN: .set use_10_vgpr.has_dyn_sized_stack, 0 121; GCN: .set use_10_vgpr.has_recursion, 0 122; GCN: .set use_10_vgpr.has_indirect_call, 0 123; GCN: TotalNumSgprs: 36 124; GCN: NumVgprs: 10 125; GCN: ScratchSize: 0 126define void @use_10_vgpr() #1 { 127 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}"() #0 128 call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}"() #0 129 ret void 130} 131 132; GCN-LABEL: {{^}}indirect_use_10_vgpr: 133; GCN: .set indirect_use_10_vgpr.num_vgpr, max(41, use_10_vgpr.num_vgpr) 134; GCN: .set indirect_use_10_vgpr.num_agpr, max(0, use_10_vgpr.num_agpr) 135; GCN: .set indirect_use_10_vgpr.numbered_sgpr, max(34, use_10_vgpr.numbered_sgpr) 136; GCN: .set indirect_use_10_vgpr.private_seg_size, 16+(max(use_10_vgpr.private_seg_size)) 137; GCN: .set indirect_use_10_vgpr.uses_vcc, or(1, use_10_vgpr.uses_vcc) 138; GCN: .set indirect_use_10_vgpr.uses_flat_scratch, or(0, use_10_vgpr.uses_flat_scratch) 139; GCN: .set indirect_use_10_vgpr.has_dyn_sized_stack, or(0, use_10_vgpr.has_dyn_sized_stack) 140; GCN: .set indirect_use_10_vgpr.has_recursion, or(0, use_10_vgpr.has_recursion) 141; GCN: .set indirect_use_10_vgpr.has_indirect_call, or(0, use_10_vgpr.has_indirect_call) 142; GCN: TotalNumSgprs: 38 143; GCN: NumVgprs: 41 144; GCN: ScratchSize: 16 145define void @indirect_use_10_vgpr() #0 { 146 call void @use_10_vgpr() 147 ret void 148} 149 150; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr: 151; GCN: .set indirect_2_level_use_10_vgpr.num_vgpr, max(32, indirect_use_10_vgpr.num_vgpr) 152; GCN: .set indirect_2_level_use_10_vgpr.num_agpr, max(0, indirect_use_10_vgpr.num_agpr) 153; GCN: .set indirect_2_level_use_10_vgpr.numbered_sgpr, max(33, indirect_use_10_vgpr.numbered_sgpr) 154; GCN: .set indirect_2_level_use_10_vgpr.private_seg_size, 0+(max(indirect_use_10_vgpr.private_seg_size)) 155; GCN: .set indirect_2_level_use_10_vgpr.uses_vcc, or(1, indirect_use_10_vgpr.uses_vcc) 156; GCN: .set indirect_2_level_use_10_vgpr.uses_flat_scratch, or(1, indirect_use_10_vgpr.uses_flat_scratch) 157; GCN: .set indirect_2_level_use_10_vgpr.has_dyn_sized_stack, or(0, indirect_use_10_vgpr.has_dyn_sized_stack) 158; GCN: .set indirect_2_level_use_10_vgpr.has_recursion, or(0, indirect_use_10_vgpr.has_recursion) 159; GCN: .set indirect_2_level_use_10_vgpr.has_indirect_call, or(0, indirect_use_10_vgpr.has_indirect_call) 160; GCN: TotalNumSgprs: 40 161; GCN: NumVgprs: 41 162; GCN: ScratchSize: 16 163define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 { 164 call void @indirect_use_10_vgpr() 165 ret void 166} 167 168; GCN-LABEL: {{^}}use_50_vgpr: 169; GCN: .set use_50_vgpr.num_vgpr, 50 170; GCN: .set use_50_vgpr.num_agpr, 0 171; GCN: .set use_50_vgpr.numbered_sgpr, 32 172; GCN: .set use_50_vgpr.private_seg_size, 0 173; GCN: .set use_50_vgpr.uses_vcc, 0 174; GCN: .set use_50_vgpr.uses_flat_scratch, 0 175; GCN: .set use_50_vgpr.has_dyn_sized_stack, 0 176; GCN: .set use_50_vgpr.has_recursion, 0 177; GCN: .set use_50_vgpr.has_indirect_call, 0 178; GCN: TotalNumSgprs: 36 179; GCN: NumVgprs: 50 180; GCN: ScratchSize: 0 181define void @use_50_vgpr() #1 { 182 call void asm sideeffect "", "~{v49}"() #0 183 ret void 184} 185 186; GCN-LABEL: {{^}}indirect_use_50_vgpr: 187; GCN: .set indirect_use_50_vgpr.num_vgpr, max(41, use_50_vgpr.num_vgpr) 188; GCN: .set indirect_use_50_vgpr.num_agpr, max(0, use_50_vgpr.num_agpr) 189; GCN: .set indirect_use_50_vgpr.numbered_sgpr, max(34, use_50_vgpr.numbered_sgpr) 190; GCN: .set indirect_use_50_vgpr.private_seg_size, 16+(max(use_50_vgpr.private_seg_size)) 191; GCN: .set indirect_use_50_vgpr.uses_vcc, or(1, use_50_vgpr.uses_vcc) 192; GCN: .set indirect_use_50_vgpr.uses_flat_scratch, or(0, use_50_vgpr.uses_flat_scratch) 193; GCN: .set indirect_use_50_vgpr.has_dyn_sized_stack, or(0, use_50_vgpr.has_dyn_sized_stack) 194; GCN: .set indirect_use_50_vgpr.has_recursion, or(0, use_50_vgpr.has_recursion) 195; GCN: .set indirect_use_50_vgpr.has_indirect_call, or(0, use_50_vgpr.has_indirect_call) 196; GCN: TotalNumSgprs: 38 197; GCN: NumVgprs: 50 198; GCN: ScratchSize: 16 199define void @indirect_use_50_vgpr() #0 { 200 call void @use_50_vgpr() 201 ret void 202} 203 204; GCN-LABEL: {{^}}use_80_sgpr: 205; GCN: .set use_80_sgpr.num_vgpr, 1 206; GCN: .set use_80_sgpr.num_agpr, 0 207; GCN: .set use_80_sgpr.numbered_sgpr, 80 208; GCN: .set use_80_sgpr.private_seg_size, 8 209; GCN: .set use_80_sgpr.uses_vcc, 0 210; GCN: .set use_80_sgpr.uses_flat_scratch, 0 211; GCN: .set use_80_sgpr.has_dyn_sized_stack, 0 212; GCN: .set use_80_sgpr.has_recursion, 0 213; GCN: .set use_80_sgpr.has_indirect_call, 0 214; GCN: TotalNumSgprs: 84 215; GCN: NumVgprs: 1 216; GCN: ScratchSize: 8 217define void @use_80_sgpr() #1 { 218 call void asm sideeffect "", "~{s79}"() #0 219 ret void 220} 221 222; GCN-LABEL: {{^}}indirect_use_80_sgpr: 223; GCN: .set indirect_use_80_sgpr.num_vgpr, max(41, use_80_sgpr.num_vgpr) 224; GCN: .set indirect_use_80_sgpr.num_agpr, max(0, use_80_sgpr.num_agpr) 225; GCN: .set indirect_use_80_sgpr.numbered_sgpr, max(34, use_80_sgpr.numbered_sgpr) 226; GCN: .set indirect_use_80_sgpr.private_seg_size, 16+(max(use_80_sgpr.private_seg_size)) 227; GCN: .set indirect_use_80_sgpr.uses_vcc, or(1, use_80_sgpr.uses_vcc) 228; GCN: .set indirect_use_80_sgpr.uses_flat_scratch, or(0, use_80_sgpr.uses_flat_scratch) 229; GCN: .set indirect_use_80_sgpr.has_dyn_sized_stack, or(0, use_80_sgpr.has_dyn_sized_stack) 230; GCN: .set indirect_use_80_sgpr.has_recursion, or(0, use_80_sgpr.has_recursion) 231; GCN: .set indirect_use_80_sgpr.has_indirect_call, or(0, use_80_sgpr.has_indirect_call) 232; GCN: TotalNumSgprs: 84 233; GCN: NumVgprs: 41 234; GCN: ScratchSize: 24 235define void @indirect_use_80_sgpr() #1 { 236 call void @use_80_sgpr() 237 ret void 238} 239 240; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr: 241; GCN: .set indirect_2_level_use_80_sgpr.num_vgpr, max(32, indirect_use_80_sgpr.num_vgpr) 242; GCN: .set indirect_2_level_use_80_sgpr.num_agpr, max(0, indirect_use_80_sgpr.num_agpr) 243; GCN: .set indirect_2_level_use_80_sgpr.numbered_sgpr, max(33, indirect_use_80_sgpr.numbered_sgpr) 244; GCN: .set indirect_2_level_use_80_sgpr.private_seg_size, 0+(max(indirect_use_80_sgpr.private_seg_size)) 245; GCN: .set indirect_2_level_use_80_sgpr.uses_vcc, or(1, indirect_use_80_sgpr.uses_vcc) 246; GCN: .set indirect_2_level_use_80_sgpr.uses_flat_scratch, or(1, indirect_use_80_sgpr.uses_flat_scratch) 247; GCN: .set indirect_2_level_use_80_sgpr.has_dyn_sized_stack, or(0, indirect_use_80_sgpr.has_dyn_sized_stack) 248; GCN: .set indirect_2_level_use_80_sgpr.has_recursion, or(0, indirect_use_80_sgpr.has_recursion) 249; GCN: .set indirect_2_level_use_80_sgpr.has_indirect_call, or(0, indirect_use_80_sgpr.has_indirect_call) 250; GCN: TotalNumSgprs: 86 251; GCN: NumVgprs: 41 252; GCN: ScratchSize: 24 253define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 { 254 call void @indirect_use_80_sgpr() 255 ret void 256} 257 258; GCN-LABEL: {{^}}use_stack0: 259; GCN: .set use_stack0.num_vgpr, 1 260; GCN: .set use_stack0.num_agpr, 0 261; GCN: .set use_stack0.numbered_sgpr, 33 262; GCN: .set use_stack0.private_seg_size, 2052 263; GCN: .set use_stack0.uses_vcc, 0 264; GCN: .set use_stack0.uses_flat_scratch, 0 265; GCN: .set use_stack0.has_dyn_sized_stack, 0 266; GCN: .set use_stack0.has_recursion, 0 267; GCN: .set use_stack0.has_indirect_call, 0 268; GCN: TotalNumSgprs: 37 269; GCN: NumVgprs: 1 270; GCN: ScratchSize: 2052 271define void @use_stack0() #1 { 272 %alloca = alloca [512 x i32], align 4, addrspace(5) 273 call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0 274 ret void 275} 276 277; GCN-LABEL: {{^}}use_stack1: 278; GCN: .set use_stack1.num_vgpr, 1 279; GCN: .set use_stack1.num_agpr, 0 280; GCN: .set use_stack1.numbered_sgpr, 33 281; GCN: .set use_stack1.private_seg_size, 404 282; GCN: .set use_stack1.uses_vcc, 0 283; GCN: .set use_stack1.uses_flat_scratch, 0 284; GCN: .set use_stack1.has_dyn_sized_stack, 0 285; GCN: .set use_stack1.has_recursion, 0 286; GCN: .set use_stack1.has_indirect_call, 0 287; GCN: TotalNumSgprs: 37 288; GCN: NumVgprs: 1 289; GCN: ScratchSize: 404 290define void @use_stack1() #1 { 291 %alloca = alloca [100 x i32], align 4, addrspace(5) 292 call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0 293 ret void 294} 295 296; GCN-LABEL: {{^}}indirect_use_stack: 297; GCN: .set indirect_use_stack.num_vgpr, max(41, use_stack0.num_vgpr) 298; GCN: .set indirect_use_stack.num_agpr, max(0, use_stack0.num_agpr) 299; GCN: .set indirect_use_stack.numbered_sgpr, max(34, use_stack0.numbered_sgpr) 300; GCN: .set indirect_use_stack.private_seg_size, 80+(max(use_stack0.private_seg_size)) 301; GCN: .set indirect_use_stack.uses_vcc, or(1, use_stack0.uses_vcc) 302; GCN: .set indirect_use_stack.uses_flat_scratch, or(0, use_stack0.uses_flat_scratch) 303; GCN: .set indirect_use_stack.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack) 304; GCN: .set indirect_use_stack.has_recursion, or(0, use_stack0.has_recursion) 305; GCN: .set indirect_use_stack.has_indirect_call, or(0, use_stack0.has_indirect_call) 306; GCN: TotalNumSgprs: 38 307; GCN: NumVgprs: 41 308; GCN: ScratchSize: 2132 309define void @indirect_use_stack() #1 { 310 %alloca = alloca [16 x i32], align 4, addrspace(5) 311 call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0 312 call void @use_stack0() 313 ret void 314} 315 316; GCN-LABEL: {{^}}indirect_2_level_use_stack: 317; GCN: .set indirect_2_level_use_stack.num_vgpr, max(32, indirect_use_stack.num_vgpr) 318; GCN: .set indirect_2_level_use_stack.num_agpr, max(0, indirect_use_stack.num_agpr) 319; GCN: .set indirect_2_level_use_stack.numbered_sgpr, max(33, indirect_use_stack.numbered_sgpr) 320; GCN: .set indirect_2_level_use_stack.private_seg_size, 0+(max(indirect_use_stack.private_seg_size)) 321; GCN: .set indirect_2_level_use_stack.uses_vcc, or(1, indirect_use_stack.uses_vcc) 322; GCN: .set indirect_2_level_use_stack.uses_flat_scratch, or(1, indirect_use_stack.uses_flat_scratch) 323; GCN: .set indirect_2_level_use_stack.has_dyn_sized_stack, or(0, indirect_use_stack.has_dyn_sized_stack) 324; GCN: .set indirect_2_level_use_stack.has_recursion, or(0, indirect_use_stack.has_recursion) 325; GCN: .set indirect_2_level_use_stack.has_indirect_call, or(0, indirect_use_stack.has_indirect_call) 326; GCN: TotalNumSgprs: 40 327; GCN: NumVgprs: 41 328; GCN: ScratchSize: 2132 329define amdgpu_kernel void @indirect_2_level_use_stack() #0 { 330 call void @indirect_use_stack() 331 ret void 332} 333 334 335; Should be maximum of callee usage 336; GCN-LABEL: {{^}}multi_call_use_use_stack: 337; GCN: .set multi_call_use_use_stack.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr) 338; GCN: .set multi_call_use_use_stack.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr) 339; GCN: .set multi_call_use_use_stack.numbered_sgpr, max(42, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr) 340; GCN: .set multi_call_use_use_stack.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size)) 341; GCN: .set multi_call_use_use_stack.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc) 342; GCN: .set multi_call_use_use_stack.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch) 343; GCN: .set multi_call_use_use_stack.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack) 344; GCN: .set multi_call_use_use_stack.has_recursion, or(0, use_stack0.has_recursion, use_stack1.has_recursion) 345; GCN: .set multi_call_use_use_stack.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call) 346; GCN: TotalNumSgprs: 48 347; GCN: NumVgprs: 41 348; GCN: ScratchSize: 2052 349define amdgpu_kernel void @multi_call_use_use_stack() #0 { 350 call void @use_stack0() 351 call void @use_stack1() 352 ret void 353} 354 355declare void @external() #0 356 357; GCN-LABEL: {{^}}multi_call_with_external: 358; GCN: .set multi_call_with_external.num_vgpr, max(41, amdgpu.max_num_vgpr) 359; GCN: .set multi_call_with_external.num_agpr, max(0, amdgpu.max_num_agpr) 360; GCN: .set multi_call_with_external.numbered_sgpr, max(42, amdgpu.max_num_sgpr) 361; GCN: .set multi_call_with_external.private_seg_size, 0 362; GCN: .set multi_call_with_external.uses_vcc, 1 363; GCN: .set multi_call_with_external.uses_flat_scratch, 1 364; GCN: .set multi_call_with_external.has_dyn_sized_stack, 1 365; GCN: .set multi_call_with_external.has_recursion, 0 366; GCN: .set multi_call_with_external.has_indirect_call, 1 367; GCN: TotalNumSgprs: multi_call_with_external.numbered_sgpr+6 368; GCN: NumVgprs: multi_call_with_external.num_vgpr 369; GCN: ScratchSize: 0 370define amdgpu_kernel void @multi_call_with_external() #0 { 371 call void @use_stack0() 372 call void @use_stack1() 373 call void @external() 374 ret void 375} 376 377; GCN-LABEL: {{^}}usage_external: 378; GCN: .set usage_external.num_vgpr, max(32, amdgpu.max_num_vgpr) 379; GCN: .set usage_external.num_agpr, max(0, amdgpu.max_num_agpr) 380; GCN: .set usage_external.numbered_sgpr, max(33, amdgpu.max_num_sgpr) 381; GCN: .set usage_external.private_seg_size, 0 382; GCN: .set usage_external.uses_vcc, 1 383; GCN: .set usage_external.uses_flat_scratch, 1 384; GCN: .set usage_external.has_dyn_sized_stack, 1 385; GCN: .set usage_external.has_recursion, 0 386; GCN: .set usage_external.has_indirect_call, 1 387; GCN: TotalNumSgprs: usage_external.numbered_sgpr+6 388; GCN: NumVgprs: usage_external.num_vgpr 389; GCN: ScratchSize: 0 390define amdgpu_kernel void @usage_external() #0 { 391 call void @external() 392 ret void 393} 394 395declare void @external_recurse() #2 396 397; GCN-LABEL: {{^}}usage_external_recurse: 398; GCN: .set usage_external_recurse.num_vgpr, max(32, amdgpu.max_num_vgpr) 399; GCN: .set usage_external_recurse.num_agpr, max(0, amdgpu.max_num_agpr) 400; GCN: .set usage_external_recurse.numbered_sgpr, max(33, amdgpu.max_num_sgpr) 401; GCN: .set usage_external_recurse.private_seg_size, 0 402; GCN: .set usage_external_recurse.uses_vcc, 1 403; GCN: .set usage_external_recurse.uses_flat_scratch, 1 404; GCN: .set usage_external_recurse.has_dyn_sized_stack, 1 405; GCN: .set usage_external_recurse.has_recursion, 1 406; GCN: .set usage_external_recurse.has_indirect_call, 1 407; GCN: TotalNumSgprs: usage_external_recurse.numbered_sgpr+6 408; GCN: NumVgprs: usage_external_recurse.num_vgpr 409; GCN: ScratchSize: 0 410define amdgpu_kernel void @usage_external_recurse() #0 { 411 call void @external_recurse() 412 ret void 413} 414 415; GCN-LABEL: {{^}}direct_recursion_use_stack: 416; GCN: .set direct_recursion_use_stack.num_vgpr, 41 417; GCN: .set direct_recursion_use_stack.num_agpr, 0 418; GCN: .set direct_recursion_use_stack.numbered_sgpr, 36 419; GCN: .set direct_recursion_use_stack.private_seg_size, 2064 420; GCN: .set direct_recursion_use_stack.uses_vcc, 1 421; GCN: .set direct_recursion_use_stack.uses_flat_scratch, 0 422; GCN: .set direct_recursion_use_stack.has_dyn_sized_stack, 0 423; GCN: .set direct_recursion_use_stack.has_recursion, 1 424; GCN: .set direct_recursion_use_stack.has_indirect_call, 0 425; GCN: TotalNumSgprs: 40 426; GCN: NumVgprs: 41 427; GCN: ScratchSize: 2064 428define void @direct_recursion_use_stack(i32 %val) #2 { 429 %alloca = alloca [512 x i32], align 4, addrspace(5) 430 call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0 431 %cmp = icmp eq i32 %val, 0 432 br i1 %cmp, label %ret, label %call 433 434call: 435 %val.sub1 = sub i32 %val, 1 436 call void @direct_recursion_use_stack(i32 %val.sub1) 437 br label %ret 438 439ret: 440 ret void 441} 442 443; GCN-LABEL: {{^}}usage_direct_recursion: 444; GCN: .set usage_direct_recursion.num_vgpr, max(32, direct_recursion_use_stack.num_vgpr) 445; GCN: .set usage_direct_recursion.num_agpr, max(0, direct_recursion_use_stack.num_agpr) 446; GCN: .set usage_direct_recursion.numbered_sgpr, max(33, direct_recursion_use_stack.numbered_sgpr) 447; GCN: .set usage_direct_recursion.private_seg_size, 0+(max(direct_recursion_use_stack.private_seg_size)) 448; GCN: .set usage_direct_recursion.uses_vcc, or(1, direct_recursion_use_stack.uses_vcc) 449; GCN: .set usage_direct_recursion.uses_flat_scratch, or(1, direct_recursion_use_stack.uses_flat_scratch) 450; GCN: .set usage_direct_recursion.has_dyn_sized_stack, or(0, direct_recursion_use_stack.has_dyn_sized_stack) 451; GCN: .set usage_direct_recursion.has_recursion, or(1, direct_recursion_use_stack.has_recursion) 452; GCN: .set usage_direct_recursion.has_indirect_call, or(0, direct_recursion_use_stack.has_indirect_call) 453; GCN: TotalNumSgprs: 42 454; GCN: NumVgprs: 41 455; GCN: ScratchSize: 2064 456define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 { 457 call void @direct_recursion_use_stack(i32 %n) 458 ret void 459} 460 461; Make sure there's no assert when a sgpr96 is used. 462; GCN-LABEL: {{^}}count_use_sgpr96_external_call 463; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr) 464; GCN: .set count_use_sgpr96_external_call.num_agpr, max(0, amdgpu.max_num_agpr) 465; GCN: .set count_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr) 466; GCN: .set count_use_sgpr96_external_call.private_seg_size, 0 467; GCN: .set count_use_sgpr96_external_call.uses_vcc, 1 468; GCN: .set count_use_sgpr96_external_call.uses_flat_scratch, 1 469; GCN: .set count_use_sgpr96_external_call.has_dyn_sized_stack, 1 470; GCN: .set count_use_sgpr96_external_call.has_recursion, 0 471; GCN: .set count_use_sgpr96_external_call.has_indirect_call, 1 472; GCN: TotalNumSgprs: count_use_sgpr96_external_call.numbered_sgpr+6 473; GCN: NumVgprs: count_use_sgpr96_external_call.num_vgpr 474; GCN: ScratchSize: 0 475define amdgpu_kernel void @count_use_sgpr96_external_call() { 476entry: 477 tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1 478 call void @external() 479 ret void 480} 481 482; Make sure there's no assert when a sgpr160 is used. 483; GCN-LABEL: {{^}}count_use_sgpr160_external_call 484; GCN: .set count_use_sgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr) 485; GCN: .set count_use_sgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr) 486; GCN: .set count_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr) 487; GCN: .set count_use_sgpr160_external_call.private_seg_size, 0 488; GCN: .set count_use_sgpr160_external_call.uses_vcc, 1 489; GCN: .set count_use_sgpr160_external_call.uses_flat_scratch, 1 490; GCN: .set count_use_sgpr160_external_call.has_dyn_sized_stack, 1 491; GCN: .set count_use_sgpr160_external_call.has_recursion, 0 492; GCN: .set count_use_sgpr160_external_call.has_indirect_call, 1 493; GCN: TotalNumSgprs: count_use_sgpr160_external_call.numbered_sgpr+6 494; GCN: NumVgprs: count_use_sgpr160_external_call.num_vgpr 495; GCN: ScratchSize: 0 496define amdgpu_kernel void @count_use_sgpr160_external_call() { 497entry: 498 tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1 499 call void @external() 500 ret void 501} 502 503; Make sure there's no assert when a vgpr160 is used. 504; GCN-LABEL: {{^}}count_use_vgpr160_external_call 505; GCN: .set count_use_vgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr) 506; GCN: .set count_use_vgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr) 507; GCN: .set count_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr) 508; GCN: .set count_use_vgpr160_external_call.private_seg_size, 0 509; GCN: .set count_use_vgpr160_external_call.uses_vcc, 1 510; GCN: .set count_use_vgpr160_external_call.uses_flat_scratch, 1 511; GCN: .set count_use_vgpr160_external_call.has_dyn_sized_stack, 1 512; GCN: .set count_use_vgpr160_external_call.has_recursion, 0 513; GCN: .set count_use_vgpr160_external_call.has_indirect_call, 1 514; GCN: TotalNumSgprs: count_use_vgpr160_external_call.numbered_sgpr+6 515; GCN: NumVgprs: count_use_vgpr160_external_call.num_vgpr 516; GCN: ScratchSize: 0 517define amdgpu_kernel void @count_use_vgpr160_external_call() { 518entry: 519 tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1 520 call void @external() 521 ret void 522} 523 524; Added at the of the .s are the module level maximums 525; GCN: .set amdgpu.max_num_vgpr, 50 526; GCN: .set amdgpu.max_num_agpr, 0 527; GCN: .set amdgpu.max_num_sgpr, 80 528 529attributes #0 = { nounwind noinline norecurse } 530attributes #1 = { nounwind noinline norecurse } 531attributes #2 = { nounwind noinline } 532