1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-BACKOFF %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX940-BACKOFF %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-back-off-barrier -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s 6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-BACKOFF %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-BACKOFF %s 8 9; Subtargets must wait for outstanding memory instructions before a barrier if 10; they cannot back off of the barrier. 11 12define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 { 13; GFX9-NO-BACKOFF-LABEL: back_off_barrier_no_fence: 14; GFX9-NO-BACKOFF: ; %bb.0: 15; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX9-NO-BACKOFF-NEXT: flat_load_dword v0, v[0:1] 17; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 18; GFX9-NO-BACKOFF-NEXT: s_barrier 19; GFX9-NO-BACKOFF-NEXT: flat_store_dword v[2:3], v0 20; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 21; GFX9-NO-BACKOFF-NEXT: s_setpc_b64 s[30:31] 22; 23; GFX9-BACKOFF-LABEL: back_off_barrier_no_fence: 24; GFX9-BACKOFF: ; %bb.0: 25; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GFX9-BACKOFF-NEXT: flat_load_dword v0, v[0:1] 27; GFX9-BACKOFF-NEXT: s_barrier 28; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 29; GFX9-BACKOFF-NEXT: flat_store_dword v[2:3], v0 30; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 31; GFX9-BACKOFF-NEXT: s_setpc_b64 s[30:31] 32; 33; GFX940-BACKOFF-LABEL: back_off_barrier_no_fence: 34; GFX940-BACKOFF: ; %bb.0: 35; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; GFX940-BACKOFF-NEXT: flat_load_dword v0, v[0:1] 37; GFX940-BACKOFF-NEXT: s_barrier 38; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 39; GFX940-BACKOFF-NEXT: flat_store_dword v[2:3], v0 sc0 sc1 40; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 41; GFX940-BACKOFF-NEXT: s_setpc_b64 s[30:31] 42; 43; GFX10-BACKOFF-LABEL: back_off_barrier_no_fence: 44; GFX10-BACKOFF: ; %bb.0: 45; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1] 47; GFX10-BACKOFF-NEXT: s_barrier 48; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 49; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0 50; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) 51; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31] 52; 53; GFX11-BACKOFF-LABEL: back_off_barrier_no_fence: 54; GFX11-BACKOFF: ; %bb.0: 55; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1] 57; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 58; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 59; GFX11-BACKOFF-NEXT: s_barrier 60; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0 61; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) 62; GFX11-BACKOFF-NEXT: s_setpc_b64 s[30:31] 63 %load = load i32, ptr %in 64 call void @llvm.amdgcn.s.barrier() 65 store i32 %load, ptr %out 66 ret void 67} 68 69define void @back_off_barrier_with_fence(ptr %in, ptr %out) #0 { 70; GFX9-NO-BACKOFF-LABEL: back_off_barrier_with_fence: 71; GFX9-NO-BACKOFF: ; %bb.0: 72; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX9-NO-BACKOFF-NEXT: flat_load_dword v0, v[0:1] 74; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 75; GFX9-NO-BACKOFF-NEXT: s_barrier 76; GFX9-NO-BACKOFF-NEXT: flat_store_dword v[2:3], v0 77; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 78; GFX9-NO-BACKOFF-NEXT: s_setpc_b64 s[30:31] 79; 80; GFX9-BACKOFF-LABEL: back_off_barrier_with_fence: 81; GFX9-BACKOFF: ; %bb.0: 82; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 83; GFX9-BACKOFF-NEXT: flat_load_dword v0, v[0:1] 84; GFX9-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) 85; GFX9-BACKOFF-NEXT: s_barrier 86; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) 87; GFX9-BACKOFF-NEXT: flat_store_dword v[2:3], v0 88; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 89; GFX9-BACKOFF-NEXT: s_setpc_b64 s[30:31] 90; 91; GFX940-BACKOFF-LABEL: back_off_barrier_with_fence: 92; GFX940-BACKOFF: ; %bb.0: 93; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 94; GFX940-BACKOFF-NEXT: flat_load_dword v0, v[0:1] 95; GFX940-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) 96; GFX940-BACKOFF-NEXT: s_barrier 97; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) 98; GFX940-BACKOFF-NEXT: flat_store_dword v[2:3], v0 sc0 sc1 99; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 100; GFX940-BACKOFF-NEXT: s_setpc_b64 s[30:31] 101; 102; GFX10-BACKOFF-LABEL: back_off_barrier_with_fence: 103; GFX10-BACKOFF: ; %bb.0: 104; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 105; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1] 106; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 107; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 108; GFX10-BACKOFF-NEXT: s_barrier 109; GFX10-BACKOFF-NEXT: buffer_gl0_inv 110; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0 111; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) 112; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31] 113; 114; GFX11-BACKOFF-LABEL: back_off_barrier_with_fence: 115; GFX11-BACKOFF: ; %bb.0: 116; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1] 118; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 119; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 120; GFX11-BACKOFF-NEXT: s_barrier 121; GFX11-BACKOFF-NEXT: buffer_gl0_inv 122; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0 123; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) 124; GFX11-BACKOFF-NEXT: s_setpc_b64 s[30:31] 125 %load = load i32, ptr %in 126 fence syncscope("workgroup") release 127 call void @llvm.amdgcn.s.barrier() 128 fence syncscope("workgroup") acquire 129 store i32 %load, ptr %out 130 ret void 131} 132 133declare void @llvm.amdgcn.s.barrier() 134 135attributes #0 = { nounwind } 136