xref: /llvm-project/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-memops.ll (revision cc3aab580b680e8566e9f7a1ff9feff895ecfc49)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
3; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
4
5target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
6target triple = "amdgcn--"
7
8define void @loads(ptr addrspace(8) %buf) {
9; CHECK-LABEL: define void @loads
10; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0:[0-9]+]] {
11; CHECK-NEXT:    [[SCALAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
12; CHECK-NEXT:    [[VEC2:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) align 8 [[BUF]], i32 16, i32 0, i32 0)
13; CHECK-NEXT:    [[VEC4:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
14; CHECK-NEXT:    [[NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !nontemporal [[META0:![0-9]+]]
15; CHECK-NEXT:    [[INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1:![0-9]+]]
16; CHECK-NEXT:    [[NONTEMPORAL_INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1]], !nontemporal [[META0]]
17; CHECK-NEXT:    [[VOLATILE:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
18; CHECK-NEXT:    [[VOLATILE_NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648), !nontemporal [[META0]]
19; CHECK-NEXT:    fence syncscope("wavefront") release
20; CHECK-NEXT:    [[ATOMIC:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
21; CHECK-NEXT:    fence syncscope("wavefront") acquire
22; CHECK-NEXT:    [[ATOMIC_MONOTONIC:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
23; CHECK-NEXT:    [[ATOMIC_ACQUIRE:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
24; CHECK-NEXT:    fence acquire
25; CHECK-NEXT:    ret void
26;
27  %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
28  %p = getelementptr float, ptr addrspace(7) %base, i32 4
29
30  %scalar = load float, ptr addrspace(7) %p, align 4
31  %vec2 = load <2 x float>, ptr addrspace(7) %p, align 8
32  %vec4 = load <4 x float>, ptr addrspace(7) %p, align 16
33
34  %nontemporal = load float, ptr addrspace(7) %p, !nontemporal !0
35  %invariant = load float, ptr addrspace(7) %p, !invariant.load !1
36  %nontemporal.invariant = load float, ptr addrspace(7) %p, !nontemporal !0, !invariant.load !1
37
38  %volatile = load volatile float, ptr addrspace(7) %p
39  %volatile.nontemporal = load volatile float, ptr addrspace(7) %p, !nontemporal !0
40
41  %atomic = load atomic volatile float, ptr addrspace(7) %p syncscope("wavefront") seq_cst, align 4
42  %atomic.monotonic = load atomic float, ptr addrspace(7) %p syncscope("wavefront") monotonic, align 4
43  %atomic.acquire = load atomic float, ptr addrspace(7) %p acquire, align 4
44
45  ret void
46}
47
48define void @stores(ptr addrspace(8) %buf, float %f, <4 x float> %f4) {
49; CHECK-LABEL: define void @stores
50; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], float [[F:%.*]], <4 x float> [[F4:%.*]]) #[[ATTR0]] {
51; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
52; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[F4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
53; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !nontemporal [[META0]]
54; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
55; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648), !nontemporal [[META0]]
56; CHECK-NEXT:    fence syncscope("wavefront") release
57; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
58; CHECK-NEXT:    fence syncscope("wavefront") acquire
59; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
60; CHECK-NEXT:    fence release
61; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
62; CHECK-NEXT:    ret void
63;
64  %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
65  %p = getelementptr float, ptr addrspace(7) %base, i32 4
66
67  store float %f, ptr addrspace(7) %p, align 4
68  store <4 x float> %f4, ptr addrspace(7) %p, align 16
69
70  store float %f, ptr addrspace(7) %p, !nontemporal !0
71
72  store volatile float %f, ptr addrspace(7) %p
73  store volatile float %f, ptr addrspace(7) %p, !nontemporal !0
74
75  store atomic volatile float %f, ptr addrspace(7) %p syncscope("wavefront") seq_cst, align 4
76  store atomic float %f, ptr addrspace(7) %p syncscope("wavefront") monotonic, align 4
77  store atomic float %f, ptr addrspace(7) %p release, align 4
78
79  ret void
80}
81
82define void @atomicrmw(ptr addrspace(8) %buf, float %f, i32 %i) {
83; CHECK-LABEL: define void @atomicrmw
84; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], float [[F:%.*]], i32 [[I:%.*]]) #[[ATTR0]] {
85; CHECK-NEXT:    fence syncscope("wavefront") release
86; CHECK-NEXT:    [[XCHG:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
87; CHECK-NEXT:    fence syncscope("wavefront") acquire
88; CHECK-NEXT:    fence syncscope("wavefront") release
89; CHECK-NEXT:    [[ADD:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
90; CHECK-NEXT:    fence syncscope("wavefront") acquire
91; CHECK-NEXT:    fence syncscope("wavefront") release
92; CHECK-NEXT:    [[SUB:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
93; CHECK-NEXT:    fence syncscope("wavefront") acquire
94; CHECK-NEXT:    fence syncscope("wavefront") release
95; CHECK-NEXT:    [[AND:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
96; CHECK-NEXT:    fence syncscope("wavefront") acquire
97; CHECK-NEXT:    fence syncscope("wavefront") release
98; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
99; CHECK-NEXT:    fence syncscope("wavefront") acquire
100; CHECK-NEXT:    fence syncscope("wavefront") release
101; CHECK-NEXT:    [[XOR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
102; CHECK-NEXT:    fence syncscope("wavefront") acquire
103; CHECK-NEXT:    fence syncscope("wavefront") release
104; CHECK-NEXT:    [[MIN:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
105; CHECK-NEXT:    fence syncscope("wavefront") acquire
106; CHECK-NEXT:    fence syncscope("wavefront") release
107; CHECK-NEXT:    [[MAX:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
108; CHECK-NEXT:    fence syncscope("wavefront") acquire
109; CHECK-NEXT:    fence syncscope("wavefront") release
110; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
111; CHECK-NEXT:    fence syncscope("wavefront") acquire
112; CHECK-NEXT:    fence syncscope("wavefront") release
113; CHECK-NEXT:    [[UMAX:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
114; CHECK-NEXT:    fence syncscope("wavefront") acquire
115; CHECK-NEXT:    fence syncscope("wavefront") release
116; CHECK-NEXT:    [[FADD:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
117; CHECK-NEXT:    fence syncscope("wavefront") acquire
118; CHECK-NEXT:    fence syncscope("wavefront") release
119; CHECK-NEXT:    [[FMAX:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
120; CHECK-NEXT:    fence syncscope("wavefront") acquire
121; CHECK-NEXT:    fence syncscope("wavefront") release
122; CHECK-NEXT:    [[FMIN:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
123; CHECK-NEXT:    fence syncscope("wavefront") acquire
124; CHECK-NEXT:    fence syncscope("wavefront") release
125; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
126; CHECK-NEXT:    fence syncscope("wavefront") acquire
127; CHECK-NEXT:    ret void
128;
129  %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
130  %p = getelementptr float, ptr addrspace(7) %base, i32 4
131
132  ; Fence insertion is tested by loads and stores
133  %xchg = atomicrmw xchg ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
134  %add = atomicrmw add ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
135  %sub = atomicrmw sub ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
136  %and = atomicrmw and ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
137  %or = atomicrmw or ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
138  %xor = atomicrmw xor ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
139  %min = atomicrmw min ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
140  %max = atomicrmw max ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
141  %umin = atomicrmw umin ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
142  %umax = atomicrmw umax ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
143
144  %fadd = atomicrmw fadd ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4
145  %fmax = atomicrmw fmax ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4
146  %fmin = atomicrmw fmin ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4
147
148  ; Check a no-return atomic
149  atomicrmw add ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
150
151  ret void
152}
153
154define {i32, i1} @cmpxchg(ptr addrspace(8) %buf, i32 %wanted, i32 %new) {
155; CHECK-LABEL: define { i32, i1 } @cmpxchg
156; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[WANTED:%.*]], i32 [[NEW:%.*]]) #[[ATTR0]] {
157; CHECK-NEXT:    fence syncscope("wavefront") release
158; CHECK-NEXT:    [[RET:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 [[NEW]], i32 [[WANTED]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
159; CHECK-NEXT:    fence syncscope("wavefront") acquire
160; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } poison, i32 [[RET]], 0
161; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[RET]], [[WANTED]]
162; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 [[TMP2]], 1
163; CHECK-NEXT:    ret { i32, i1 } [[TMP3]]
164;
165  %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
166  %p = getelementptr i32, ptr addrspace(7) %base, i32 4
167
168  %ret = cmpxchg volatile ptr addrspace(7) %p, i32 %wanted, i32 %new syncscope("wavefront") acq_rel monotonic, align 4
169  ret {i32, i1} %ret
170}
171
172define {i32, i1} @cmpxchg_weak(ptr addrspace(8) %buf, i32 %wanted, i32 %new) {
173; CHECK-LABEL: define { i32, i1 } @cmpxchg_weak
174; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[WANTED:%.*]], i32 [[NEW:%.*]]) #[[ATTR0]] {
175; CHECK-NEXT:    fence syncscope("wavefront") release
176; CHECK-NEXT:    [[RET:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 [[NEW]], i32 [[WANTED]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
177; CHECK-NEXT:    fence syncscope("wavefront") acquire
178; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } poison, i32 [[RET]], 0
179; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
180;
181  %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
182  %p = getelementptr i32, ptr addrspace(7) %base, i32 4
183
184  %ret = cmpxchg weak ptr addrspace(7) %p, i32 %wanted, i32 %new syncscope("wavefront") acq_rel monotonic, align 4
185  ret {i32, i1} %ret
186}
187
188!0 = ! { i32 1 }
189!1 = ! { }
190