1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 3 4; Load argument depends on waitcnt which should be skipped. 5define amdgpu_kernel void @call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 { 6; GCN-LABEL: call_memory_arg_load: 7; GCN: ; %bb.0: 8; GCN-NEXT: s_load_dword s6, s[6:7], 0x0 9; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 10; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 11; GCN-NEXT: s_add_u32 s0, s0, s11 12; GCN-NEXT: s_addc_u32 s1, s1, 0 13; GCN-NEXT: s_waitcnt lgkmcnt(0) 14; GCN-NEXT: v_mov_b32_e32 v0, s6 15; GCN-NEXT: ds_read_b32 v0, v0 16; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] 17; GCN-NEXT: s_mov_b32 s32, 0 18; GCN-NEXT: s_getpc_b64 s[8:9] 19; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4 20; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12 21; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] 22; GCN-NEXT: s_endpgm 23 %vgpr = load volatile i32, ptr addrspace(3) %ptr 24 call void @func(i32 %vgpr) 25 ret void 26} 27 28; Memory waitcnt with no register dependence on the call 29define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 { 30; GCN-LABEL: call_memory_no_dep: 31; GCN: ; %bb.0: 32; GCN-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 33; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 34; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 35; GCN-NEXT: s_add_u32 s0, s0, s11 36; GCN-NEXT: v_mov_b32_e32 v0, 0 37; GCN-NEXT: s_addc_u32 s1, s1, 0 38; GCN-NEXT: s_waitcnt lgkmcnt(0) 39; GCN-NEXT: global_store_dword v0, v0, s[6:7] 40; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] 41; GCN-NEXT: v_mov_b32_e32 v0, 0 42; GCN-NEXT: s_mov_b32 s32, 0 43; GCN-NEXT: s_getpc_b64 s[8:9] 44; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4 45; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12 46; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] 47; GCN-NEXT: s_endpgm 48 store i32 0, ptr addrspace(1) %ptr 49 call void @func(i32 0) 50 ret void 51} 52 53; Should not wait after the call before memory 54define amdgpu_kernel void @call_no_wait_after_call(ptr addrspace(1) %ptr, i32) #0 { 55; GCN-LABEL: call_no_wait_after_call: 56; GCN: ; %bb.0: 57; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 58; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0 59; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 60; GCN-NEXT: s_add_u32 s0, s0, s11 61; GCN-NEXT: s_addc_u32 s1, s1, 0 62; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] 63; GCN-NEXT: v_mov_b32_e32 v0, 0 64; GCN-NEXT: s_mov_b32 s32, 0 65; GCN-NEXT: s_getpc_b64 s[8:9] 66; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4 67; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12 68; GCN-NEXT: v_mov_b32_e32 v40, 0 69; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] 70; GCN-NEXT: global_store_dword v40, v40, s[34:35] 71; GCN-NEXT: s_endpgm 72 call void @func(i32 0) 73 store i32 0, ptr addrspace(1) %ptr 74 ret void 75} 76 77define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %ptr, i32) #0 { 78; GCN-LABEL: call_no_wait_after_call_return_val: 79; GCN: ; %bb.0: 80; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 81; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0 82; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 83; GCN-NEXT: s_add_u32 s0, s0, s11 84; GCN-NEXT: s_addc_u32 s1, s1, 0 85; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] 86; GCN-NEXT: v_mov_b32_e32 v0, 0 87; GCN-NEXT: s_mov_b32 s32, 0 88; GCN-NEXT: s_getpc_b64 s[8:9] 89; GCN-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4 90; GCN-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12 91; GCN-NEXT: v_mov_b32_e32 v40, 0 92; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] 93; GCN-NEXT: global_store_dword v40, v0, s[34:35] 94; GCN-NEXT: s_endpgm 95 %rv = call i32 @func.return(i32 0) 96 store i32 %rv, ptr addrspace(1) %ptr 97 ret void 98} 99 100; Need to wait for the address dependency 101define amdgpu_kernel void @call_got_load(ptr addrspace(1) %ptr, i32) #0 { 102; GCN-LABEL: call_got_load: 103; GCN: ; %bb.0: 104; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11 105; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0 106; GCN-NEXT: s_add_u32 s0, s0, s11 107; GCN-NEXT: s_addc_u32 s1, s1, 0 108; GCN-NEXT: s_getpc_b64 s[6:7] 109; GCN-NEXT: s_add_u32 s6, s6, got.func@gotpcrel32@lo+4 110; GCN-NEXT: s_addc_u32 s7, s7, got.func@gotpcrel32@hi+12 111; GCN-NEXT: s_load_dwordx2 s[8:9], s[6:7], 0x0 112; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] 113; GCN-NEXT: v_mov_b32_e32 v0, 0 114; GCN-NEXT: s_mov_b32 s32, 0 115; GCN-NEXT: s_waitcnt lgkmcnt(0) 116; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] 117; GCN-NEXT: s_endpgm 118 call void @got.func(i32 0) 119 ret void 120} 121 122; Need to wait for the address dependency 123define void @tailcall_got_load(ptr addrspace(1) %ptr, i32) #0 { 124; GCN-LABEL: tailcall_got_load: 125; GCN: ; %bb.0: 126; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GCN-NEXT: s_getpc_b64 s[4:5] 128; GCN-NEXT: s_add_u32 s4, s4, got.func@gotpcrel32@lo+4 129; GCN-NEXT: s_addc_u32 s5, s5, got.func@gotpcrel32@hi+12 130; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 131; GCN-NEXT: v_mov_b32_e32 v0, 0 132; GCN-NEXT: s_waitcnt lgkmcnt(0) 133; GCN-NEXT: s_setpc_b64 s[4:5] 134 tail call void @got.func(i32 0) 135 ret void 136} 137 138; No need to wait for the load. 139define void @tail_call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 { 140; GCN-LABEL: tail_call_memory_arg_load: 141; GCN: ; %bb.0: 142; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 143; GCN-NEXT: ds_read_b32 v0, v0 144; GCN-NEXT: s_getpc_b64 s[4:5] 145; GCN-NEXT: s_add_u32 s4, s4, func@rel32@lo+4 146; GCN-NEXT: s_addc_u32 s5, s5, func@rel32@hi+12 147; GCN-NEXT: s_setpc_b64 s[4:5] 148 %vgpr = load volatile i32, ptr addrspace(3) %ptr 149 tail call void @func(i32 %vgpr) 150 ret void 151} 152 153declare hidden void @func(i32) #0 154declare hidden i32 @func.return(i32) #0 155declare void @got.func(i32) #0 156 157attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 158 159!llvm.module.flags = !{!0} 160!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 161