1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s 4 5; Test with gfx803 so that 6; addrspacecast/llvm.amdgcn.is.shared/llvm.amdgcn.is.private require 7; the queue ptr. Tests with code object v3 and above to test 8; llvm.trap/llvm.debugtrap that require the queue ptr. 9 10 11declare hidden void @requires_all_inputs() 12 13; This function incorrectly is marked with the hints that the callee 14; does not require the implicit arguments to the function. Make sure 15; we do not crash. 16define void @parent_func_missing_inputs() #0 { 17; FIXEDABI-LABEL: parent_func_missing_inputs: 18; FIXEDABI: ; %bb.0: 19; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; FIXEDABI-NEXT: s_mov_b32 s16, s33 21; FIXEDABI-NEXT: s_mov_b32 s33, s32 22; FIXEDABI-NEXT: s_or_saveexec_b64 s[18:19], -1 23; FIXEDABI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 24; FIXEDABI-NEXT: s_mov_b64 exec, s[18:19] 25; FIXEDABI-NEXT: v_writelane_b32 v40, s16, 2 26; FIXEDABI-NEXT: s_addk_i32 s32, 0x400 27; FIXEDABI-NEXT: v_writelane_b32 v40, s30, 0 28; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1 29; FIXEDABI-NEXT: s_getpc_b64 s[16:17] 30; FIXEDABI-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4 31; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12 32; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17] 33; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1 34; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0 35; FIXEDABI-NEXT: s_mov_b32 s32, s33 36; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 2 37; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1 38; FIXEDABI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 39; FIXEDABI-NEXT: s_mov_b64 exec, s[6:7] 40; FIXEDABI-NEXT: s_mov_b32 s33, s4 41; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 42; FIXEDABI-NEXT: s_setpc_b64 s[30:31] 43 call void @requires_all_inputs() 44 ret void 45} 46 47define amdgpu_kernel void @parent_kernel_missing_inputs() #0 { 48; FIXEDABI-SDAG-LABEL: parent_kernel_missing_inputs: 49; FIXEDABI-SDAG: ; %bb.0: 50; FIXEDABI-SDAG-NEXT: s_add_i32 s4, s4, s9 51; FIXEDABI-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 52; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 53; FIXEDABI-SDAG-NEXT: s_add_u32 s0, s0, s9 54; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 55; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 56; FIXEDABI-SDAG-NEXT: s_addc_u32 s1, s1, 0 57; FIXEDABI-SDAG-NEXT: s_mov_b32 s14, s8 58; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v31, v0, v2 59; FIXEDABI-SDAG-NEXT: s_mov_b64 s[8:9], 0 60; FIXEDABI-SDAG-NEXT: s_mov_b32 s12, s6 61; FIXEDABI-SDAG-NEXT: s_mov_b32 s13, s7 62; FIXEDABI-SDAG-NEXT: s_mov_b32 s32, 0 63; FIXEDABI-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s5 64; FIXEDABI-SDAG-NEXT: s_getpc_b64 s[4:5] 65; FIXEDABI-SDAG-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4 66; FIXEDABI-SDAG-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12 67; FIXEDABI-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] 68; FIXEDABI-SDAG-NEXT: s_endpgm 69; 70; FIXEDABI-GISEL-LABEL: parent_kernel_missing_inputs: 71; FIXEDABI-GISEL: ; %bb.0: 72; FIXEDABI-GISEL-NEXT: s_add_i32 s4, s4, s9 73; FIXEDABI-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 74; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 75; FIXEDABI-GISEL-NEXT: s_add_u32 s0, s0, s9 76; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 77; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 78; FIXEDABI-GISEL-NEXT: s_addc_u32 s1, s1, 0 79; FIXEDABI-GISEL-NEXT: s_mov_b32 s14, s8 80; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v31, v0, v1 81; FIXEDABI-GISEL-NEXT: s_mov_b64 s[8:9], 0 82; FIXEDABI-GISEL-NEXT: s_mov_b32 s12, s6 83; FIXEDABI-GISEL-NEXT: s_mov_b32 s13, s7 84; FIXEDABI-GISEL-NEXT: s_mov_b32 s32, 0 85; FIXEDABI-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s5 86; FIXEDABI-GISEL-NEXT: s_getpc_b64 s[4:5] 87; FIXEDABI-GISEL-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4 88; FIXEDABI-GISEL-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12 89; FIXEDABI-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] 90; FIXEDABI-GISEL-NEXT: s_endpgm 91 call void @requires_all_inputs() 92 ret void 93} 94 95; Function is marked with amdgpu-no-workitem-id-* but uses them anyway 96define void @marked_func_use_workitem_id(ptr addrspace(1) %ptr) #0 { 97; FIXEDABI-SDAG-LABEL: marked_func_use_workitem_id: 98; FIXEDABI-SDAG: ; %bb.0: 99; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; FIXEDABI-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v31 101; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v2 102; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) 103; FIXEDABI-SDAG-NEXT: v_bfe_u32 v2, v31, 10, 10 104; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v2 105; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) 106; FIXEDABI-SDAG-NEXT: v_bfe_u32 v2, v31, 20, 10 107; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v2 108; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) 109; FIXEDABI-SDAG-NEXT: s_setpc_b64 s[30:31] 110; 111; FIXEDABI-GISEL-LABEL: marked_func_use_workitem_id: 112; FIXEDABI-GISEL: ; %bb.0: 113; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 114; FIXEDABI-GISEL-NEXT: v_and_b32_e32 v2, 0x3ff, v31 115; FIXEDABI-GISEL-NEXT: v_bfe_u32 v3, v31, 10, 10 116; FIXEDABI-GISEL-NEXT: v_bfe_u32 v4, v31, 20, 10 117; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v2 118; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) 119; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v3 120; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) 121; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v4 122; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) 123; FIXEDABI-GISEL-NEXT: s_setpc_b64 s[30:31] 124 %id.x = call i32 @llvm.amdgcn.workitem.id.x() 125 %id.y = call i32 @llvm.amdgcn.workitem.id.y() 126 %id.z = call i32 @llvm.amdgcn.workitem.id.z() 127 store volatile i32 %id.x, ptr addrspace(1) %ptr 128 store volatile i32 %id.y, ptr addrspace(1) %ptr 129 store volatile i32 %id.z, ptr addrspace(1) %ptr 130 ret void 131} 132 133; Function is marked with amdgpu-no-workitem-id-* but uses them anyway 134define amdgpu_kernel void @marked_kernel_use_workitem_id(ptr addrspace(1) %ptr) #0 { 135; FIXEDABI-LABEL: marked_kernel_use_workitem_id: 136; FIXEDABI: ; %bb.0: 137; FIXEDABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 138; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) 139; FIXEDABI-NEXT: v_mov_b32_e32 v4, s1 140; FIXEDABI-NEXT: v_mov_b32_e32 v3, s0 141; FIXEDABI-NEXT: flat_store_dword v[3:4], v0 142; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 143; FIXEDABI-NEXT: flat_store_dword v[3:4], v1 144; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 145; FIXEDABI-NEXT: flat_store_dword v[3:4], v2 146; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 147; FIXEDABI-NEXT: s_endpgm 148 %id.x = call i32 @llvm.amdgcn.workitem.id.x() 149 %id.y = call i32 @llvm.amdgcn.workitem.id.y() 150 %id.z = call i32 @llvm.amdgcn.workitem.id.z() 151 store volatile i32 %id.x, ptr addrspace(1) %ptr 152 store volatile i32 %id.y, ptr addrspace(1) %ptr 153 store volatile i32 %id.z, ptr addrspace(1) %ptr 154 ret void 155} 156 157define void @marked_func_use_workgroup_id(ptr addrspace(1) %ptr) #0 { 158; FIXEDABI-LABEL: marked_func_use_workgroup_id: 159; FIXEDABI: ; %bb.0: 160; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 161; FIXEDABI-NEXT: v_mov_b32_e32 v2, s12 162; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 163; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 164; FIXEDABI-NEXT: v_mov_b32_e32 v2, s13 165; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 166; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 167; FIXEDABI-NEXT: v_mov_b32_e32 v2, s14 168; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 169; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 170; FIXEDABI-NEXT: s_setpc_b64 s[30:31] 171 %id.x = call i32 @llvm.amdgcn.workgroup.id.x() 172 %id.y = call i32 @llvm.amdgcn.workgroup.id.y() 173 %id.z = call i32 @llvm.amdgcn.workgroup.id.z() 174 store volatile i32 %id.x, ptr addrspace(1) %ptr 175 store volatile i32 %id.y, ptr addrspace(1) %ptr 176 store volatile i32 %id.z, ptr addrspace(1) %ptr 177 ret void 178} 179 180define amdgpu_kernel void @marked_kernel_use_workgroup_id(ptr addrspace(1) %ptr) #0 { 181; FIXEDABI-LABEL: marked_kernel_use_workgroup_id: 182; FIXEDABI: ; %bb.0: 183; FIXEDABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 184; FIXEDABI-NEXT: v_mov_b32_e32 v2, s6 185; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) 186; FIXEDABI-NEXT: v_mov_b32_e32 v0, s0 187; FIXEDABI-NEXT: v_mov_b32_e32 v1, s1 188; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 189; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 190; FIXEDABI-NEXT: v_mov_b32_e32 v2, s7 191; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 192; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 193; FIXEDABI-NEXT: v_mov_b32_e32 v2, s8 194; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 195; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 196; FIXEDABI-NEXT: s_endpgm 197 %id.x = call i32 @llvm.amdgcn.workgroup.id.x() 198 %id.y = call i32 @llvm.amdgcn.workgroup.id.y() 199 %id.z = call i32 @llvm.amdgcn.workgroup.id.z() 200 store volatile i32 %id.x, ptr addrspace(1) %ptr 201 store volatile i32 %id.y, ptr addrspace(1) %ptr 202 store volatile i32 %id.z, ptr addrspace(1) %ptr 203 ret void 204} 205 206define void @marked_func_use_other_sgpr(ptr addrspace(1) %ptr) #0 { 207; FIXEDABI-LABEL: marked_func_use_other_sgpr: 208; FIXEDABI: ; %bb.0: 209; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 210; FIXEDABI-NEXT: v_mov_b32_e32 v2, s6 211; FIXEDABI-NEXT: v_mov_b32_e32 v3, s7 212; FIXEDABI-NEXT: flat_load_ubyte v2, v[2:3] glc 213; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 214; FIXEDABI-NEXT: v_mov_b32_e32 v2, s8 215; FIXEDABI-NEXT: v_mov_b32_e32 v3, s9 216; FIXEDABI-NEXT: flat_load_ubyte v2, v[2:3] glc 217; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 218; FIXEDABI-NEXT: v_mov_b32_e32 v2, s4 219; FIXEDABI-NEXT: v_mov_b32_e32 v3, s5 220; FIXEDABI-NEXT: flat_load_ubyte v2, v[2:3] glc 221; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 222; FIXEDABI-NEXT: v_mov_b32_e32 v2, s10 223; FIXEDABI-NEXT: v_mov_b32_e32 v3, s11 224; FIXEDABI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 225; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 226; FIXEDABI-NEXT: s_setpc_b64 s[30:31] 227 %queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr() 228 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 229 %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 230 %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() 231 %queue.load = load volatile i8, ptr addrspace(4) %queue.ptr 232 %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr 233 %dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr 234 store volatile i64 %dispatch.id, ptr addrspace(1) %ptr 235 ret void 236} 237 238define amdgpu_kernel void @marked_kernel_use_other_sgpr(ptr addrspace(1) %ptr) #0 { 239; FIXEDABI-LABEL: marked_kernel_use_other_sgpr: 240; FIXEDABI: ; %bb.0: 241; FIXEDABI-NEXT: s_add_u32 s0, s4, 8 242; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc 243; FIXEDABI-NEXT: s_addc_u32 s1, s5, 0 244; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 245; FIXEDABI-NEXT: v_mov_b32_e32 v0, s0 246; FIXEDABI-NEXT: v_mov_b32_e32 v1, s1 247; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc 248; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 249; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc 250; FIXEDABI-NEXT: s_endpgm 251 %queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr() 252 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 253 %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 254 %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() 255 %queue.load = load volatile i8, ptr addrspace(4) %queue.ptr 256 %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr 257 %dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr 258 ret void 259} 260 261define amdgpu_kernel void @marked_kernel_nokernargs_implicitarg_ptr() #0 { 262; FIXEDABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr: 263; FIXEDABI: ; %bb.0: 264; FIXEDABI-NEXT: v_mov_b32_e32 v0, 0 265; FIXEDABI-NEXT: v_mov_b32_e32 v1, 0 266; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc 267; FIXEDABI-NEXT: s_endpgm 268 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 269 %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr 270 ret void 271} 272 273; On gfx8, the queue ptr is required for this addrspacecast. 274define void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) #0 { 275; FIXEDABI-SDAG-LABEL: addrspacecast_requires_queue_ptr: 276; FIXEDABI-SDAG: ; %bb.0: 277; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 278; FIXEDABI-SDAG-NEXT: s_mov_b64 s[4:5], 0xc0 279; FIXEDABI-SDAG-NEXT: s_load_dword s6, s[4:5], 0x0 280; FIXEDABI-SDAG-NEXT: s_mov_b64 s[4:5], 0xc4 281; FIXEDABI-SDAG-NEXT: s_load_dword s4, s[4:5], 0x0 282; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 283; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 284; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v2, s6 285; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc 286; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc 287; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, s4 288; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 289; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v0, vcc 290; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, 1 291; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc 292; FIXEDABI-SDAG-NEXT: flat_store_dword v[2:3], v0 293; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) 294; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, 2 295; FIXEDABI-SDAG-NEXT: flat_store_dword v[4:5], v0 296; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 297; FIXEDABI-SDAG-NEXT: s_setpc_b64 s[30:31] 298; 299; FIXEDABI-GISEL-LABEL: addrspacecast_requires_queue_ptr: 300; FIXEDABI-GISEL: ; %bb.0: 301; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 302; FIXEDABI-GISEL-NEXT: s_mov_b64 s[4:5], 0xc0 303; FIXEDABI-GISEL-NEXT: s_load_dword s6, s[4:5], 0x0 304; FIXEDABI-GISEL-NEXT: s_mov_b64 s[4:5], 0xc4 305; FIXEDABI-GISEL-NEXT: s_load_dword s4, s[4:5], 0x0 306; FIXEDABI-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 307; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc 308; FIXEDABI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 309; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v3, s6 310; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc 311; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v4, s4 312; FIXEDABI-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 313; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 314; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc 315; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v4, 1 316; FIXEDABI-GISEL-NEXT: flat_store_dword v[2:3], v4 317; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) 318; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v2, 2 319; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v2 320; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 321; FIXEDABI-GISEL-NEXT: s_setpc_b64 s[30:31] 322 %flat.private = addrspacecast ptr addrspace(5) %ptr.private to ptr 323 %flat.local = addrspacecast ptr addrspace(3) %ptr.local to ptr 324 store volatile i32 1, ptr %flat.private 325 store volatile i32 2, ptr %flat.local 326 ret void 327} 328 329define void @is_shared_requires_queue_ptr(ptr %ptr) #0 { 330; FIXEDABI-LABEL: is_shared_requires_queue_ptr: 331; FIXEDABI: ; %bb.0: 332; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 333; FIXEDABI-NEXT: s_mov_b64 s[4:5], 0xc4 334; FIXEDABI-NEXT: s_load_dword s4, s[4:5], 0x0 335; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) 336; FIXEDABI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 337; FIXEDABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 338; FIXEDABI-NEXT: flat_store_dword v[0:1], v0 339; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 340; FIXEDABI-NEXT: s_setpc_b64 s[30:31] 341 %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %ptr) 342 %zext = zext i1 %is.shared to i32 343 store volatile i32 %zext, ptr addrspace(1) undef 344 ret void 345} 346 347define void @is_private_requires_queue_ptr(ptr %ptr) #0 { 348; FIXEDABI-LABEL: is_private_requires_queue_ptr: 349; FIXEDABI: ; %bb.0: 350; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 351; FIXEDABI-NEXT: s_mov_b64 s[4:5], 0xc0 352; FIXEDABI-NEXT: s_load_dword s4, s[4:5], 0x0 353; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) 354; FIXEDABI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 355; FIXEDABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 356; FIXEDABI-NEXT: flat_store_dword v[0:1], v0 357; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 358; FIXEDABI-NEXT: s_setpc_b64 s[30:31] 359 %is.private = call i1 @llvm.amdgcn.is.private(ptr %ptr) 360 %zext = zext i1 %is.private to i32 361 store volatile i32 %zext, ptr addrspace(1) undef 362 ret void 363} 364 365define void @trap_requires_queue() #0 { 366; FIXEDABI-LABEL: trap_requires_queue: 367; FIXEDABI: ; %bb.0: 368; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 369; FIXEDABI-NEXT: s_mov_b64 s[4:5], 0xc8 370; FIXEDABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 371; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) 372; FIXEDABI-NEXT: s_trap 2 373 call void @llvm.trap() 374 unreachable 375} 376 377define void @debugtrap_requires_queue() #0 { 378; FIXEDABI-LABEL: debugtrap_requires_queue: 379; FIXEDABI: ; %bb.0: 380; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 381; FIXEDABI-NEXT: s_trap 3 382 call void @llvm.debugtrap() 383 unreachable 384} 385 386declare i32 @llvm.amdgcn.workitem.id.x() 387declare i32 @llvm.amdgcn.workitem.id.y() 388declare i32 @llvm.amdgcn.workitem.id.z() 389declare i32 @llvm.amdgcn.workgroup.id.x() 390declare i32 @llvm.amdgcn.workgroup.id.y() 391declare i32 @llvm.amdgcn.workgroup.id.z() 392declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() 393declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 394declare i64 @llvm.amdgcn.dispatch.id() 395declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 396declare i1 @llvm.amdgcn.is.shared(ptr) 397declare i1 @llvm.amdgcn.is.private(ptr) 398declare void @llvm.trap() 399declare void @llvm.debugtrap() 400 401attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" } 402 403!llvm.module.flags = !{!0} 404!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 405