1; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra < %s | FileCheck -check-prefix=GCN %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4; Kernels are not called, so there is no call preserved mask. 5; GCN-LABEL: {{^}}kernel: 6; GCN: flat_store_dword 7define amdgpu_kernel void @kernel(ptr addrspace(1) %out) #0 { 8entry: 9 store i32 0, ptr addrspace(1) %out 10 ret void 11} 12 13; GCN-LABEL: {{^}}func: 14; GCN: ; NumVgprs: 8 15define hidden void @func() #1 { 16 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 17 ret void 18} 19 20; GCN-LABEL: {{^}}kernel_call: 21; GCN-NOT: buffer_store 22; GCN-NOT: buffer_load 23; GCN-NOT: readlane 24; GCN-NOT: writelane 25; GCN: flat_load_dword v8 26; GCN: s_swappc_b64 27; GCN-NOT: buffer_store 28; GCN-NOT: buffer_load 29; GCN-NOT: readlane 30; GCN-NOT: writelane 31; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 32 33; GCN: ; TotalNumSgprs: 37 34; GCN: ; NumVgprs: 9 35define amdgpu_kernel void @kernel_call() #0 { 36 %vgpr = load volatile i32, ptr addrspace(1) undef 37 tail call void @func() 38 store volatile i32 %vgpr, ptr addrspace(1) undef 39 ret void 40} 41 42; GCN-LABEL: {{^}}func_regular_call: 43; GCN-NOT: buffer_load 44; GCN-NOT: readlane 45; GCN: flat_load_dword v8 46; GCN: s_swappc_b64 47; GCN-NOT: buffer_load 48; GCN-NOT: readlane 49; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 50 51; GCN: ; TotalNumSgprs: 34 52; GCN: ; NumVgprs: 10 53define void @func_regular_call() #1 { 54 %vgpr = load volatile i32, ptr addrspace(1) undef 55 tail call void @func() 56 store volatile i32 %vgpr, ptr addrspace(1) undef 57 ret void 58} 59 60; GCN-LABEL: {{^}}func_tail_call: 61; GCN: s_waitcnt 62; GCN-NEXT: s_getpc_b64 s[16:17] 63; GCN-NEXT: s_add_u32 s16, 64; GCN-NEXT: s_addc_u32 s17, 65; GCN-NEXT: s_setpc_b64 s[16:17] 66 67; GCN: ; TotalNumSgprs: 32 68; GCN: ; NumVgprs: 8 69define void @func_tail_call() #1 { 70 tail call void @func() 71 ret void 72} 73 74; GCN-LABEL: {{^}}func_call_tail_call: 75; GCN: flat_load_dword v8 76; GCN: s_swappc_b64 77; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 78; GCN: s_setpc_b64 79 80; GCN: ; TotalNumSgprs: 34 81; GCN: ; NumVgprs: 10 82define void @func_call_tail_call() #1 { 83 %vgpr = load volatile i32, ptr addrspace(1) undef 84 tail call void @func() 85 store volatile i32 %vgpr, ptr addrspace(1) undef 86 tail call void @func() 87 ret void 88} 89 90; GCN-LABEL: {{^}}void_func_void: 91define void @void_func_void() noinline { 92 ret void 93} 94 95; Make sure we don't get save/restore of FP between calls. 96; GCN-LABEL: {{^}}test_funcx2: 97; GCN: s_getpc_b64 98; GCN-NOT: s32 99; GCN: s_swappc_b64 100; GCN-NOT: s32 101; GCN: s_swappc_b64 102define void @test_funcx2() #0 { 103 call void @void_func_void() 104 call void @void_func_void() 105 ret void 106} 107 108; Make sure we save/restore the return address around the call. 109; Function Attrs: norecurse 110define internal void @hoge() #2 { 111bb: 112; GCN-LABEL: {{^}}hoge: 113; GCN-DAG: v_writelane_b32 [[CSR_VGPR:v[0-9]+]], s30, 114; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 115; GCN: s_swappc_b64 s[30:31] 116; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]], 117; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]], 118; GCN: s_waitcnt vmcnt(0) 119; GCN: s_setpc_b64 s[30:31] 120 call void @eggs() 121 ret void 122} 123 124; GCN-LABEL: {{^}}wombat: 125define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) { 126bb: 127 call void @hoge() #0 128 ret void 129} 130 131declare dso_local void @eggs() 132 133 134attributes #0 = { nounwind } 135attributes #1 = { nounwind noinline "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 136attributes #2 = { norecurse } 137