1; RUN: opt -passes=amdgpu-attributor -mcpu=kaveri < %s | llc -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s 2; RUN: opt -passes=amdgpu-attributor -mcpu=gfx900 < %s | llc -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 3 4target triple = "amdgcn-amd-amdhsa" 5 6; GCN-LABEL: {{^}}use_dispatch_ptr: 7; GCN: s_load_dword s{{[0-9]+}}, s[4:5] 8define hidden void @use_dispatch_ptr() #1 { 9 %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 10 %value = load volatile i32, ptr addrspace(4) %dispatch_ptr 11 ret void 12} 13 14; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr: 15; GCN-NOT: s[4:5] 16; GCN-NOT: s4 17; GCN-NOT: s5 18; GCN: .amdhsa_user_sgpr_dispatch_ptr 1 19define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 { 20 call void @use_dispatch_ptr() 21 ret void 22} 23 24; GCN-LABEL: {{^}}use_queue_ptr: 25; GCN: s_load_dword s{{[0-9]+}}, s[6:7] 26define hidden void @use_queue_ptr() #1 { 27 %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 28 %value = load volatile i32, ptr addrspace(4) %queue_ptr 29 ret void 30} 31 32; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr: 33; GCN: s_swappc_b64 s[30:31], s[10:11] 34; GCN: .amdhsa_user_sgpr_queue_ptr 1 35define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 { 36 call void @use_queue_ptr() 37 ret void 38} 39 40; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast: 41; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[4:5], 0x0 42; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16 43; CIVI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]] 44 45; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base 46; GFX9-DAG: v_mov_b32_e32 v[[VGPR_HI:[0-9]+]], s[[HI]] 47; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[VGPR_HI]]] 48 49; CIVI: {{flat|global}}_store_dword v[[[LO]]:[[HI]]] 50define hidden void @use_queue_ptr_addrspacecast() #1 { 51 %asc = addrspacecast ptr addrspace(3) inttoptr (i32 16 to ptr addrspace(3)) to ptr 52 store volatile i32 0, ptr %asc 53 ret void 54} 55 56; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast: 57; CIVI: s_swappc_b64 s[30:31], s[4:5] 58; CIVI: .amdhsa_user_sgpr_queue_ptr 0 59 60; GFX9-NOT: s_mov_b64 s[6:7] 61; GFX9: .amdhsa_user_sgpr_queue_ptr 0 62define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 { 63 call void @use_queue_ptr_addrspacecast() 64 ret void 65} 66 67; Not really supported in callable functions. 68; GCN-LABEL: {{^}}use_kernarg_segment_ptr: 69; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0 70; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0 71define hidden void @use_kernarg_segment_ptr() #1 { 72 %kernarg_segment_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0 73 %value = load volatile i32, ptr addrspace(4) %kernarg_segment_ptr 74 ret void 75} 76 77; GCN-LABEL: {{^}}use_implicitarg_ptr: 78; GCN: s_load_dword s{{[0-9]+}}, s[8:9] 79define hidden void @use_implicitarg_ptr() #1 { 80 %implicit.arg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 81 %value = load volatile i32, ptr addrspace(4) %implicit.arg.ptr 82 ret void 83} 84 85; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr: 86; GCN: .amdhsa_user_sgpr_kernarg_segment_ptr 1 87define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 { 88 call void @use_kernarg_segment_ptr() 89 ret void 90} 91 92; GCN-LABEL: {{^}}use_dispatch_id: 93; GCN: ; use s[10:11] 94define hidden void @use_dispatch_id() #1 { 95 %id = call i64 @llvm.amdgcn.dispatch.id() 96 call void asm sideeffect "; use $0", "s"(i64 %id) 97 ret void 98} 99 100; No kernarg segment so that there is a mov to check. With kernarg 101; pointer enabled, it happens to end up in the right place anyway. 102 103; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id: 104; GCN: s_mov_b64 s[10:11], s[4:5] 105; GCN: .amdhsa_user_sgpr_dispatch_id 1 106define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 { 107 call void @use_dispatch_id() 108 ret void 109} 110 111; GCN-LABEL: {{^}}use_workgroup_id_x: 112; GCN: s_waitcnt 113; GCN: ; use s12 114define hidden void @use_workgroup_id_x() #1 { 115 %val = call i32 @llvm.amdgcn.workgroup.id.x() 116 call void asm sideeffect "; use $0", "s"(i32 %val) 117 ret void 118} 119 120; GCN-LABEL: {{^}}use_stack_workgroup_id_x: 121; GCN: s_waitcnt 122; GCN-NOT: s32 123; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}} 124; GCN: ; use s12 125; GCN: s_setpc_b64 126define hidden void @use_stack_workgroup_id_x() #1 { 127 %alloca = alloca i32, addrspace(5) 128 store volatile i32 0, ptr addrspace(5) %alloca 129 %val = call i32 @llvm.amdgcn.workgroup.id.x() 130 call void asm sideeffect "; use $0", "s"(i32 %val) 131 ret void 132} 133 134; GCN-LABEL: {{^}}use_workgroup_id_y: 135; GCN: s_waitcnt 136; GCN: ; use s13 137define hidden void @use_workgroup_id_y() #1 { 138 %val = call i32 @llvm.amdgcn.workgroup.id.y() 139 call void asm sideeffect "; use $0", "s"(i32 %val) 140 ret void 141} 142 143; GCN-LABEL: {{^}}use_workgroup_id_z: 144; GCN: s_waitcnt 145; GCN: ; use s14 146define hidden void @use_workgroup_id_z() #1 { 147 %val = call i32 @llvm.amdgcn.workgroup.id.z() 148 call void asm sideeffect "; use $0", "s"(i32 %val) 149 ret void 150} 151 152; GCN-LABEL: {{^}}use_workgroup_id_xy: 153; GCN: ; use s12 154; GCN: ; use s13 155define hidden void @use_workgroup_id_xy() #1 { 156 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 157 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 158 call void asm sideeffect "; use $0", "s"(i32 %val0) 159 call void asm sideeffect "; use $0", "s"(i32 %val1) 160 ret void 161} 162 163; GCN-LABEL: {{^}}use_workgroup_id_xyz: 164; GCN: ; use s12 165; GCN: ; use s13 166; GCN: ; use s14 167define hidden void @use_workgroup_id_xyz() #1 { 168 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 169 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 170 %val2 = call i32 @llvm.amdgcn.workgroup.id.z() 171 call void asm sideeffect "; use $0", "s"(i32 %val0) 172 call void asm sideeffect "; use $0", "s"(i32 %val1) 173 call void asm sideeffect "; use $0", "s"(i32 %val2) 174 ret void 175} 176 177; GCN-LABEL: {{^}}use_workgroup_id_xz: 178; GCN: ; use s12 179; GCN: ; use s14 180define hidden void @use_workgroup_id_xz() #1 { 181 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 182 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 183 call void asm sideeffect "; use $0", "s"(i32 %val0) 184 call void asm sideeffect "; use $0", "s"(i32 %val1) 185 ret void 186} 187 188; GCN-LABEL: {{^}}use_workgroup_id_yz: 189; GCN: ; use s13 190; GCN: ; use s14 191define hidden void @use_workgroup_id_yz() #1 { 192 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 193 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 194 call void asm sideeffect "; use $0", "s"(i32 %val0) 195 call void asm sideeffect "; use $0", "s"(i32 %val1) 196 ret void 197} 198 199; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x: 200; GCN-NOT: s6 201; GCN: s_mov_b32 s12, s6 202; GCN: s_mov_b32 s32, 0 203; GCN: s_getpc_b64 s[4:5] 204; GCN-NEXT: s_add_u32 s4, s4, use_workgroup_id_x@rel32@lo+4 205; GCN-NEXT: s_addc_u32 s5, s5, use_workgroup_id_x@rel32@hi+12 206; GCN: s_swappc_b64 207; GCN-NEXT: s_endpgm 208 209; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 210; GCN: .amdhsa_system_sgpr_workgroup_id_y 0 211; GCN: .amdhsa_system_sgpr_workgroup_id_z 0 212define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 { 213 call void @use_workgroup_id_x() 214 ret void 215} 216 217; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y: 218; GCN-NOT: s12 219; GCN: s_mov_b32 s13, s7 220; GCN-NOT: s12 221; GCN: s_mov_b32 s32, 0 222; GCN: s_swappc_b64 223 224; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 225; GCN: .amdhsa_system_sgpr_workgroup_id_y 1 226; GCN: .amdhsa_system_sgpr_workgroup_id_z 0 227define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { 228 call void @use_workgroup_id_y() 229 ret void 230} 231 232; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z: 233; GCN-NOT: s12 234; GCN-NOT: s13 235; GCN: s_mov_b32 s14, s7 236; GCN-NOT: s12 237; GCN-NOT: s13 238 239; GCN: s_mov_b32 s32, 0 240; GCN: s_swappc_b64 241 242; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 243; GCN: .amdhsa_system_sgpr_workgroup_id_y 0 244; GCN: .amdhsa_system_sgpr_workgroup_id_z 1 245define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 { 246 call void @use_workgroup_id_z() 247 ret void 248} 249 250; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy: 251; GCN-NOT: s14 252; GCN: s_mov_b32 s12, s6 253; GCN-NEXT: s_mov_b32 s13, s7 254; GCN-NOT: s14 255 256; GCN: s_mov_b32 s32, 0 257; GCN: s_swappc_b64 258 259; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 260; GCN: .amdhsa_system_sgpr_workgroup_id_y 1 261; GCN: .amdhsa_system_sgpr_workgroup_id_z 0 262define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 { 263 call void @use_workgroup_id_xy() 264 ret void 265} 266 267; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz: 268; GCN: s_mov_b32 s12, s6 269; GCN: s_mov_b32 s13, s7 270; GCN: s_mov_b32 s14, s8 271; GCN: s_mov_b32 s32, 0 272; GCN: s_swappc_b64 273 274; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 275; GCN: .amdhsa_system_sgpr_workgroup_id_y 1 276; GCN: .amdhsa_system_sgpr_workgroup_id_z 1 277define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 { 278 call void @use_workgroup_id_xyz() 279 ret void 280} 281 282; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz: 283 284; GCN-NOT: s13 285; GCN: s_mov_b32 s12, s6 286; GCN-NEXT: s_mov_b32 s14, s7 287; GCN-NOT: s13 288 289; GCN: s_mov_b32 s32, 0 290; GCN: s_swappc_b64 291 292; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 293; GCN: .amdhsa_system_sgpr_workgroup_id_y 0 294; GCN: .amdhsa_system_sgpr_workgroup_id_z 1 295define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 { 296 call void @use_workgroup_id_xz() 297 ret void 298} 299 300; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz: 301 302; GCN: s_mov_b32 s13, s7 303; GCN: s_mov_b32 s14, s8 304 305; GCN: s_mov_b32 s32, 0 306; GCN: s_swappc_b64 307 308; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 309; GCN: .amdhsa_system_sgpr_workgroup_id_y 1 310; GCN: .amdhsa_system_sgpr_workgroup_id_z 1 311define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 { 312 call void @use_workgroup_id_yz() 313 ret void 314} 315 316; Argument is in right place already 317; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x: 318; GCN-NOT: s12 319; GCN-NOT: s13 320; GCN-NOT: s14 321; GCN: v_readlane_b32 s30, v40, 0 322define hidden void @func_indirect_use_workgroup_id_x() #1 { 323 call void @use_workgroup_id_x() 324 ret void 325} 326 327; Argument is in right place already. We are free to clobber other 328; SGPR arguments 329; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y: 330; GCN-NOT: s12 331; GCN-NOT: s13 332; GCN-NOT: s14 333define hidden void @func_indirect_use_workgroup_id_y() #1 { 334 call void @use_workgroup_id_y() 335 ret void 336} 337 338; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z: 339; GCN-NOT: s12 340; GCN-NOT: s13 341; GCN-NOT: s14 342define hidden void @func_indirect_use_workgroup_id_z() #1 { 343 call void @use_workgroup_id_z() 344 ret void 345} 346 347; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x: 348; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 349; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off 350; GCN: ; use s12 351define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 { 352 %val = call i32 @llvm.amdgcn.workgroup.id.x() 353 store volatile i32 %arg0, ptr addrspace(1) undef 354 call void asm sideeffect "; use $0", "s"(i32 %val) 355 ret void 356} 357 358; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y: 359; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 360; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off 361; GCN: ; use s13 362define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 { 363 %val = call i32 @llvm.amdgcn.workgroup.id.y() 364 store volatile i32 %arg0, ptr addrspace(1) undef 365 call void asm sideeffect "; use $0", "s"(i32 %val) 366 ret void 367} 368 369; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z: 370; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 371; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off 372; GCN: ; use s14 373define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 { 374 %val = call i32 @llvm.amdgcn.workgroup.id.z() 375 store volatile i32 %arg0, ptr addrspace(1) undef 376 call void asm sideeffect "; use $0", "s"(i32 %val) 377 ret void 378} 379 380; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x: 381 382; GCN-NOT: s13 383; GCN-NOT: s14 384; GCN-DAG: s_mov_b32 s12, s6 385; GCN-DAG: v_mov_b32_e32 v0, 0x22b 386; GCN-NOT: s13 387; GCN-NOT: s14 388 389; GCN-DAG: s_mov_b32 s32, 0 390; GCN: s_swappc_b64 391 392; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 393; GCN: .amdhsa_system_sgpr_workgroup_id_y 0 394; GCN: .amdhsa_system_sgpr_workgroup_id_z 0 395define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 { 396 call void @other_arg_use_workgroup_id_x(i32 555) 397 ret void 398} 399 400; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y: 401; GCN-DAG: v_mov_b32_e32 v0, 0x22b 402; GCN-DAG: s_mov_b32 s13, s7 403 404; GCN-DAG: s_mov_b32 s32, 0 405; GCN: s_swappc_b64 406 407; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 408; GCN: .amdhsa_system_sgpr_workgroup_id_y 1 409; GCN: .amdhsa_system_sgpr_workgroup_id_z 0 410define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 { 411 call void @other_arg_use_workgroup_id_y(i32 555) 412 ret void 413} 414 415; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z: 416; GCN-DAG: v_mov_b32_e32 v0, 0x22b 417; GCN-DAG: s_mov_b32 s14, s7 418 419; GCN: s_mov_b32 s32, 0 420; GCN: s_swappc_b64 421 422; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 423; GCN: .amdhsa_system_sgpr_workgroup_id_y 0 424; GCN: .amdhsa_system_sgpr_workgroup_id_z 1 425define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 { 426 call void @other_arg_use_workgroup_id_z(i32 555) 427 ret void 428} 429 430; GCN-LABEL: {{^}}use_every_sgpr_input: 431; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}} 432; GCN: s_load_dword s{{[0-9]+}}, s[4:5] 433; GCN: s_load_dword s{{[0-9]+}}, s[6:7] 434; GCN: s_load_dword s{{[0-9]+}}, s[8:9] 435; GCN: ; use s[10:11] 436; GCN: ; use s12 437; GCN: ; use s13 438; GCN: ; use s14 439define hidden void @use_every_sgpr_input() #1 { 440 %alloca = alloca i32, align 4, addrspace(5) 441 store volatile i32 0, ptr addrspace(5) %alloca 442 443 %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 444 %val0 = load volatile i32, ptr addrspace(4) %dispatch_ptr 445 446 %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 447 %val1 = load volatile i32, ptr addrspace(4) %queue_ptr 448 449 %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 450 %val2 = load volatile i32, ptr addrspace(4) %implicitarg.ptr 451 452 %val3 = call i64 @llvm.amdgcn.dispatch.id() 453 call void asm sideeffect "; use $0", "s"(i64 %val3) 454 455 %val4 = call i32 @llvm.amdgcn.workgroup.id.x() 456 call void asm sideeffect "; use $0", "s"(i32 %val4) 457 458 %val5 = call i32 @llvm.amdgcn.workgroup.id.y() 459 call void asm sideeffect "; use $0", "s"(i32 %val5) 460 461 %val6 = call i32 @llvm.amdgcn.workgroup.id.z() 462 call void asm sideeffect "; use $0", "s"(i32 %val6) 463 464 ret void 465} 466 467; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input: 468; GCN: s_mov_b32 s32, 0 469; GCN: s_swappc_b64 470 471; GCN: .amdhsa_user_sgpr_private_segment_buffer 1 472; GCN: .amdhsa_user_sgpr_dispatch_ptr 1 473; GCN: .amdhsa_user_sgpr_queue_ptr 1 474; GCN: .amdhsa_user_sgpr_kernarg_segment_ptr 1 475; GCN: .amdhsa_user_sgpr_dispatch_id 1 476; GCN: .amdhsa_user_sgpr_flat_scratch_init 1 477; GCN: .amdhsa_user_sgpr_private_segment_size 0 478; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 479; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 480; GCN: .amdhsa_system_sgpr_workgroup_id_y 1 481; GCN: .amdhsa_system_sgpr_workgroup_id_z 1 482; GCN: .amdhsa_system_sgpr_workgroup_info 0 483; GCN: .amdhsa_system_vgpr_workitem_id 0 484define amdgpu_kernel void @kern_indirect_use_every_sgpr_input(i8) #1 { 485 call void @use_every_sgpr_input() 486 ret void 487} 488 489; We have to pass the kernarg segment, but there are no kernel 490; arguments so null is passed. 491; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input_no_kernargs: 492; GCN: s_mov_b64 s[10:11], s[8:9] 493; GCN: s_mov_b32 s32, 0 494; GCN: s_swappc_b64 495 496; GCN: .amdhsa_user_sgpr_private_segment_buffer 1 497; GCN: .amdhsa_user_sgpr_dispatch_ptr 1 498; GCN: .amdhsa_user_sgpr_queue_ptr 1 499; GCN: .amdhsa_user_sgpr_kernarg_segment_ptr 0 500; GCN: .amdhsa_user_sgpr_dispatch_id 1 501; GCN: .amdhsa_user_sgpr_flat_scratch_init 1 502; GCN: .amdhsa_user_sgpr_private_segment_size 0 503; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 504; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 505; GCN: .amdhsa_system_sgpr_workgroup_id_y 1 506; GCN: .amdhsa_system_sgpr_workgroup_id_z 1 507; GCN: .amdhsa_system_sgpr_workgroup_info 0 508; GCN: .amdhsa_system_vgpr_workitem_id 0 509define amdgpu_kernel void @kern_indirect_use_every_sgpr_input_no_kernargs() #2 { 510 call void @use_every_sgpr_input() 511 ret void 512} 513 514; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input: 515; GCN-NOT: s6 516; GCN-NOT: s7 517; GCN-NOT: s8 518; GCN-NOT: s9 519; GCN-NOT: s10 520; GCN-NOT: s11 521; GCN-NOT: s12 522; GCN-NOT: s13 523; GCN-NOT: s[6:7] 524; GCN-NOT: s[8:9] 525; GCN-NOT: s[10:11] 526; GCN-NOT: s[12:13] 527; GCN-NOT: s14 528; GCN: s_or_saveexec_b64 s[16:17], -1 529define hidden void @func_indirect_use_every_sgpr_input() #1 { 530 call void @use_every_sgpr_input() 531 ret void 532} 533 534; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz: 535; GCN-NOT: s12 536; GCN-NOT: s13 537; GCN-NOT: s14 538; GCN: ; use s[10:11] 539; GCN: ; use s12 540; GCN: ; use s13 541; GCN: ; use s14 542 543; GCN: s_swappc_b64 544define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 { 545 %alloca = alloca i32, align 4, addrspace(5) 546 store volatile i32 0, ptr addrspace(5) %alloca 547 548 %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 549 %val0 = load volatile i32, ptr addrspace(4) %dispatch_ptr 550 551 %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 552 %val1 = load volatile i32, ptr addrspace(4) %queue_ptr 553 554 %kernarg_segment_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 555 %val2 = load volatile i32, ptr addrspace(4) %kernarg_segment_ptr 556 557 %val3 = call i64 @llvm.amdgcn.dispatch.id() 558 call void asm sideeffect "; use $0", "s"(i64 %val3) 559 560 %val4 = call i32 @llvm.amdgcn.workgroup.id.x() 561 call void asm sideeffect "; use $0", "s"(i32 %val4) 562 563 %val5 = call i32 @llvm.amdgcn.workgroup.id.y() 564 call void asm sideeffect "; use $0", "s"(i32 %val5) 565 566 %val6 = call i32 @llvm.amdgcn.workgroup.id.z() 567 call void asm sideeffect "; use $0", "s"(i32 %val6) 568 569 call void @use_workgroup_id_xyz() 570 ret void 571} 572 573declare i32 @llvm.amdgcn.workgroup.id.x() #0 574declare i32 @llvm.amdgcn.workgroup.id.y() #0 575declare i32 @llvm.amdgcn.workgroup.id.z() #0 576declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 577declare noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0 578declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 579declare i64 @llvm.amdgcn.dispatch.id() #0 580declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 581 582attributes #0 = { nounwind readnone speculatable } 583attributes #1 = { nounwind noinline } 584attributes #2 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" } 585 586!llvm.module.flags = !{!0} 587!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 588