xref: /llvm-project/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll (revision 2a95022cff38dc0978f527ae580b5720eb9e4d98)
1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3
4; GCN-LABEL: {{^}}atomic_load_monotonic_i8:
5; GCN: s_waitcnt
6; GFX9-NOT: s_mov_b32 m0
7; CI-NEXT: s_mov_b32 m0
8; GCN-NEXT: ds_read_u8 v0, v0{{$}}
9; GCN-NEXT: s_waitcnt lgkmcnt(0)
10; GCN-NEXT: s_setpc_b64
11define i8 @atomic_load_monotonic_i8(ptr addrspace(3) %ptr) {
12  %load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1
13  ret i8 %load
14}
15
16; GCN-LABEL: {{^}}atomic_load_monotonic_i8_offset:
17; GCN: s_waitcnt
18; GFX9-NOT: s_mov_b32 m0
19; CI-NEXT: s_mov_b32 m0
20; GCN-NEXT: ds_read_u8 v0, v0 offset:16{{$}}
21; GCN-NEXT: s_waitcnt lgkmcnt(0)
22; GCN-NEXT: s_setpc_b64
23define i8 @atomic_load_monotonic_i8_offset(ptr addrspace(3) %ptr) {
24  %gep = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16
25  %load = load atomic i8, ptr addrspace(3) %gep monotonic, align 1
26  ret i8 %load
27}
28
29; GCN-LABEL: {{^}}atomic_load_monotonic_i16:
30; GCN: s_waitcnt
31; GFX9-NOT: s_mov_b32 m0
32; CI-NEXT: s_mov_b32 m0
33; GCN-NEXT: ds_read_u16 v0, v0{{$}}
34; GCN-NEXT: s_waitcnt lgkmcnt(0)
35; GCN-NEXT: s_setpc_b64
36define i16 @atomic_load_monotonic_i16(ptr addrspace(3) %ptr) {
37  %load = load atomic i16, ptr addrspace(3) %ptr monotonic, align 2
38  ret i16 %load
39}
40
41; GCN-LABEL: {{^}}atomic_load_monotonic_i16_offset:
42; GCN: s_waitcnt
43; GFX9-NOT: s_mov_b32 m0
44; CI-NEXT: s_mov_b32 m0
45; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
46; GCN-NEXT: s_waitcnt lgkmcnt(0)
47; GCN-NEXT: s_setpc_b64
48define i16 @atomic_load_monotonic_i16_offset(ptr addrspace(3) %ptr) {
49  %gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16
50  %load = load atomic i16, ptr addrspace(3) %gep monotonic, align 2
51  ret i16 %load
52}
53
54; GCN-LABEL: {{^}}atomic_load_monotonic_i32:
55; GCN: s_waitcnt
56; GFX9-NOT: s_mov_b32 m0
57; CI-NEXT: s_mov_b32 m0
58; GCN-NEXT: ds_read_b32 v0, v0{{$}}
59; GCN-NEXT: s_waitcnt lgkmcnt(0)
60; GCN-NEXT: s_setpc_b64
61define i32 @atomic_load_monotonic_i32(ptr addrspace(3) %ptr) {
62  %load = load atomic i32, ptr addrspace(3) %ptr monotonic, align 4
63  ret i32 %load
64}
65
66; GCN-LABEL: {{^}}atomic_load_monotonic_i32_offset:
67; GCN: s_waitcnt
68; GFX9-NOT: s_mov_b32 m0
69; CI-NEXT: s_mov_b32 m0
70; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
71; GCN-NEXT: s_waitcnt lgkmcnt(0)
72; GCN-NEXT: s_setpc_b64
73define i32 @atomic_load_monotonic_i32_offset(ptr addrspace(3) %ptr) {
74  %gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16
75  %load = load atomic i32, ptr addrspace(3) %gep monotonic, align 4
76  ret i32 %load
77}
78
79; GCN-LABEL: {{^}}atomic_load_monotonic_i64:
80; GCN: s_waitcnt
81; GFX9-NOT: s_mov_b32 m0
82; CI-NEXT: s_mov_b32 m0
83; GCN-NEXT: ds_read_b64 v[0:1], v0{{$}}
84; GCN-NEXT: s_waitcnt lgkmcnt(0)
85; GCN-NEXT: s_setpc_b64
86define i64 @atomic_load_monotonic_i64(ptr addrspace(3) %ptr) {
87  %load = load atomic i64, ptr addrspace(3) %ptr monotonic, align 8
88  ret i64 %load
89}
90
91; GCN-LABEL: {{^}}atomic_load_monotonic_i64_offset:
92; GCN: s_waitcnt
93; GFX9-NOT: s_mov_b32 m0
94; CI-NEXT: s_mov_b32 m0
95; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
96; GCN-NEXT: s_waitcnt lgkmcnt(0)
97; GCN-NEXT: s_setpc_b64
98define i64 @atomic_load_monotonic_i64_offset(ptr addrspace(3) %ptr) {
99  %gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i32 16
100  %load = load atomic i64, ptr addrspace(3) %gep monotonic, align 8
101  ret i64 %load
102}
103
104; GCN-LABEL: {{^}}atomic_load_monotonic_f32_offset:
105; GCN: s_waitcnt
106; GFX9-NOT: s_mov_b32 m0
107; CI-NEXT: s_mov_b32 m0
108; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
109; GCN-NEXT: s_waitcnt lgkmcnt(0)
110; GCN-NEXT: s_setpc_b64
111define float @atomic_load_monotonic_f32_offset(ptr addrspace(3) %ptr) {
112  %gep = getelementptr inbounds float, ptr addrspace(3) %ptr, i32 16
113  %load = load atomic float, ptr addrspace(3) %gep monotonic, align 4
114  ret float %load
115}
116
117; GCN-LABEL: {{^}}atomic_load_monotonic_f64_offset:
118; GCN: s_waitcnt
119; GFX9-NOT: s_mov_b32 m0
120; CI-NEXT: s_mov_b32 m0
121; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
122; GCN-NEXT: s_waitcnt lgkmcnt(0)
123; GCN-NEXT: s_setpc_b64
124define double @atomic_load_monotonic_f64_offset(ptr addrspace(3) %ptr) {
125  %gep = getelementptr inbounds double, ptr addrspace(3) %ptr, i32 16
126  %load = load atomic double, ptr addrspace(3) %gep monotonic, align 8
127  ret double %load
128}
129
130; GCN-LABEL: {{^}}atomic_load_monotonic_p0i8_offset:
131; GCN: s_waitcnt
132; GFX9-NOT: s_mov_b32 m0
133; CI-NEXT: s_mov_b32 m0
134; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
135; GCN-NEXT: s_waitcnt lgkmcnt(0)
136; GCN-NEXT: s_setpc_b64
137define ptr @atomic_load_monotonic_p0i8_offset(ptr addrspace(3) %ptr) {
138  %gep = getelementptr inbounds ptr, ptr addrspace(3) %ptr, i32 16
139  %load = load atomic ptr, ptr addrspace(3) %gep monotonic, align 8
140  ret ptr %load
141}
142
143; GCN-LABEL: {{^}}atomic_load_monotonic_p3i8_offset:
144; GCN: s_waitcnt
145; GFX9-NOT: s_mov_b32 m0
146; CI-NEXT: s_mov_b32 m0
147; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
148; GCN-NEXT: s_waitcnt lgkmcnt(0)
149; GCN-NEXT: s_setpc_b64
150define ptr addrspace(3) @atomic_load_monotonic_p3i8_offset(ptr addrspace(3) %ptr) {
151  %gep = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %ptr, i32 16
152  %load = load atomic ptr addrspace(3), ptr addrspace(3) %gep monotonic, align 4
153  ret ptr addrspace(3) %load
154}
155
156; GCN-LABEL: {{^}}atomic_load_monotonic_f16:
157; GCN: s_waitcnt
158; GFX9-NOT: s_mov_b32 m0
159; CI-NEXT: s_mov_b32 m0
160; GCN-NEXT: ds_read_u16 v0, v0{{$}}
161; GCN-NEXT: s_waitcnt lgkmcnt(0)
162; GCN-NEXT: s_setpc_b64
163define i16 @atomic_load_monotonic_f16(ptr addrspace(3) %ptr) {
164  %load = load atomic half, ptr addrspace(3) %ptr monotonic, align 2
165  %ret = bitcast half %load to i16
166  ret i16 %ret
167}
168
169; GCN-LABEL: {{^}}atomic_load_monotonic_f16_offset:
170; GCN: s_waitcnt
171; GFX9-NOT: s_mov_b32 m0
172; CI-NEXT: s_mov_b32 m0
173; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
174; GCN-NEXT: s_waitcnt lgkmcnt(0)
175; GCN-NEXT: s_setpc_b64
176define i16 @atomic_load_monotonic_f16_offset(ptr addrspace(3) %ptr) {
177  %gep = getelementptr inbounds half, ptr addrspace(3) %ptr, i32 16
178  %load = load atomic half, ptr addrspace(3) %gep monotonic, align 2
179  %ret = bitcast half %load to i16
180  ret i16 %ret
181}
182
183; GCN-LABEL: {{^}}atomic_load_monotonic_bf16:
184; GCN: s_waitcnt
185; GFX9-NOT: s_mov_b32 m0
186; CI-NEXT: s_mov_b32 m0
187; GCN-NEXT: ds_read_u16 v0, v0{{$}}
188; GCN-NEXT: s_waitcnt lgkmcnt(0)
189; GCN-NEXT: s_setpc_b64
190define i16 @atomic_load_monotonic_bf16(ptr addrspace(3) %ptr) {
191  %load = load atomic bfloat, ptr addrspace(3) %ptr monotonic, align 2
192  %ret = bitcast bfloat %load to i16
193  ret i16 %ret
194}
195
196; GCN-LABEL: {{^}}atomic_load_monotonic_bf16_offset:
197; GCN: s_waitcnt
198; GFX9-NOT: s_mov_b32 m0
199; CI-NEXT: s_mov_b32 m0
200; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
201; GCN-NEXT: s_waitcnt lgkmcnt(0)
202; GCN-NEXT: s_setpc_b64
203define i16 @atomic_load_monotonic_bf16_offset(ptr addrspace(3) %ptr) {
204  %gep = getelementptr inbounds bfloat, ptr addrspace(3) %ptr, i32 16
205  %load = load atomic bfloat, ptr addrspace(3) %gep monotonic, align 2
206  %ret = bitcast bfloat %load to i16
207  ret i16 %ret
208}
209