xref: /llvm-project/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll (revision ef067f52044042fbe1b6fa21a90bfdbcf1622b02)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-BACKOFF %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX940-BACKOFF %s
5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-back-off-barrier -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s
6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-BACKOFF %s
7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-BACKOFF %s
8
9; Subtargets must wait for outstanding memory instructions before a barrier if
10; they cannot back off of the barrier.
11
12define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 {
13; GFX9-NO-BACKOFF-LABEL: back_off_barrier_no_fence:
14; GFX9-NO-BACKOFF:       ; %bb.0:
15; GFX9-NO-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; GFX9-NO-BACKOFF-NEXT:    flat_load_dword v0, v[0:1]
17; GFX9-NO-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
18; GFX9-NO-BACKOFF-NEXT:    s_barrier
19; GFX9-NO-BACKOFF-NEXT:    flat_store_dword v[2:3], v0
20; GFX9-NO-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
21; GFX9-NO-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
22;
23; GFX9-BACKOFF-LABEL: back_off_barrier_no_fence:
24; GFX9-BACKOFF:       ; %bb.0:
25; GFX9-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26; GFX9-BACKOFF-NEXT:    flat_load_dword v0, v[0:1]
27; GFX9-BACKOFF-NEXT:    s_barrier
28; GFX9-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
29; GFX9-BACKOFF-NEXT:    flat_store_dword v[2:3], v0
30; GFX9-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
31; GFX9-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
32;
33; GFX940-BACKOFF-LABEL: back_off_barrier_no_fence:
34; GFX940-BACKOFF:       ; %bb.0:
35; GFX940-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; GFX940-BACKOFF-NEXT:    flat_load_dword v0, v[0:1]
37; GFX940-BACKOFF-NEXT:    s_barrier
38; GFX940-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
39; GFX940-BACKOFF-NEXT:    flat_store_dword v[2:3], v0 sc0 sc1
40; GFX940-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
41; GFX940-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
42;
43; GFX10-BACKOFF-LABEL: back_off_barrier_no_fence:
44; GFX10-BACKOFF:       ; %bb.0:
45; GFX10-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; GFX10-BACKOFF-NEXT:    flat_load_dword v0, v[0:1]
47; GFX10-BACKOFF-NEXT:    s_barrier
48; GFX10-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
49; GFX10-BACKOFF-NEXT:    flat_store_dword v[2:3], v0
50; GFX10-BACKOFF-NEXT:    s_waitcnt lgkmcnt(0)
51; GFX10-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
52;
53; GFX11-BACKOFF-LABEL: back_off_barrier_no_fence:
54; GFX11-BACKOFF:       ; %bb.0:
55; GFX11-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56; GFX11-BACKOFF-NEXT:    flat_load_b32 v0, v[0:1]
57; GFX11-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
58; GFX11-BACKOFF-NEXT:    s_waitcnt_vscnt null, 0x0
59; GFX11-BACKOFF-NEXT:    s_barrier
60; GFX11-BACKOFF-NEXT:    flat_store_b32 v[2:3], v0
61; GFX11-BACKOFF-NEXT:    s_waitcnt lgkmcnt(0)
62; GFX11-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
63  %load = load i32, ptr %in
64  call void @llvm.amdgcn.s.barrier()
65  store i32 %load, ptr %out
66  ret void
67}
68
69define void @back_off_barrier_with_fence(ptr %in, ptr %out) #0 {
70; GFX9-NO-BACKOFF-LABEL: back_off_barrier_with_fence:
71; GFX9-NO-BACKOFF:       ; %bb.0:
72; GFX9-NO-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73; GFX9-NO-BACKOFF-NEXT:    flat_load_dword v0, v[0:1]
74; GFX9-NO-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
75; GFX9-NO-BACKOFF-NEXT:    s_barrier
76; GFX9-NO-BACKOFF-NEXT:    flat_store_dword v[2:3], v0
77; GFX9-NO-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
78; GFX9-NO-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
79;
80; GFX9-BACKOFF-LABEL: back_off_barrier_with_fence:
81; GFX9-BACKOFF:       ; %bb.0:
82; GFX9-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83; GFX9-BACKOFF-NEXT:    flat_load_dword v0, v[0:1]
84; GFX9-BACKOFF-NEXT:    s_waitcnt lgkmcnt(0)
85; GFX9-BACKOFF-NEXT:    s_barrier
86; GFX9-BACKOFF-NEXT:    s_waitcnt vmcnt(0)
87; GFX9-BACKOFF-NEXT:    flat_store_dword v[2:3], v0
88; GFX9-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
89; GFX9-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
90;
91; GFX940-BACKOFF-LABEL: back_off_barrier_with_fence:
92; GFX940-BACKOFF:       ; %bb.0:
93; GFX940-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94; GFX940-BACKOFF-NEXT:    flat_load_dword v0, v[0:1]
95; GFX940-BACKOFF-NEXT:    s_waitcnt lgkmcnt(0)
96; GFX940-BACKOFF-NEXT:    s_barrier
97; GFX940-BACKOFF-NEXT:    s_waitcnt vmcnt(0)
98; GFX940-BACKOFF-NEXT:    flat_store_dword v[2:3], v0 sc0 sc1
99; GFX940-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
100; GFX940-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
101;
102; GFX10-BACKOFF-LABEL: back_off_barrier_with_fence:
103; GFX10-BACKOFF:       ; %bb.0:
104; GFX10-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105; GFX10-BACKOFF-NEXT:    flat_load_dword v0, v[0:1]
106; GFX10-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
107; GFX10-BACKOFF-NEXT:    s_waitcnt_vscnt null, 0x0
108; GFX10-BACKOFF-NEXT:    s_barrier
109; GFX10-BACKOFF-NEXT:    buffer_gl0_inv
110; GFX10-BACKOFF-NEXT:    flat_store_dword v[2:3], v0
111; GFX10-BACKOFF-NEXT:    s_waitcnt lgkmcnt(0)
112; GFX10-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
113;
114; GFX11-BACKOFF-LABEL: back_off_barrier_with_fence:
115; GFX11-BACKOFF:       ; %bb.0:
116; GFX11-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117; GFX11-BACKOFF-NEXT:    flat_load_b32 v0, v[0:1]
118; GFX11-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
119; GFX11-BACKOFF-NEXT:    s_waitcnt_vscnt null, 0x0
120; GFX11-BACKOFF-NEXT:    s_barrier
121; GFX11-BACKOFF-NEXT:    buffer_gl0_inv
122; GFX11-BACKOFF-NEXT:    flat_store_b32 v[2:3], v0
123; GFX11-BACKOFF-NEXT:    s_waitcnt lgkmcnt(0)
124; GFX11-BACKOFF-NEXT:    s_setpc_b64 s[30:31]
125  %load = load i32, ptr %in
126  fence syncscope("workgroup") release
127  call void @llvm.amdgcn.s.barrier()
128  fence syncscope("workgroup") acquire
129  store i32 %load, ptr %out
130  ret void
131}
132
133declare void @llvm.amdgcn.s.barrier()
134
135attributes #0 = { nounwind }
136