xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll (revision db5bcb24c20e9ad5558ba7bb4f90c3a6000665f1)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5
6define i8 @atomic_load_flat_monotonic_i8(ptr %ptr) {
7; GCN-LABEL: atomic_load_flat_monotonic_i8:
8; GCN:       ; %bb.0:
9; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GCN-NEXT:    flat_load_ubyte v0, v[0:1] glc
11; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
12; GCN-NEXT:    s_setpc_b64 s[30:31]
13  %load = load atomic i8, ptr %ptr monotonic, align 1
14  ret i8 %load
15}
16
17define i32 @atomic_load_flat_monotonic_i8_zext_to_i32(ptr %ptr) {
18; GCN-LABEL: atomic_load_flat_monotonic_i8_zext_to_i32:
19; GCN:       ; %bb.0:
20; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GCN-NEXT:    flat_load_ubyte v0, v[0:1] glc
22; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
23; GCN-NEXT:    s_setpc_b64 s[30:31]
24  %load = load atomic i8, ptr %ptr monotonic, align 1
25  %ext = zext i8 %load to i32
26  ret i32 %ext
27}
28
29define i32 @atomic_load_flat_monotonic_i8_sext_to_i32(ptr %ptr) {
30; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
31; GCN:       ; %bb.0:
32; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GCN-NEXT:    flat_load_sbyte v0, v[0:1] glc
34; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
35; GCN-NEXT:    s_setpc_b64 s[30:31]
36  %load = load atomic i8, ptr %ptr monotonic, align 1
37  %ext = sext i8 %load to i32
38  ret i32 %ext
39}
40
41define i16 @atomic_load_flat_monotonic_i8_zext_to_i16(ptr %ptr) {
42; GCN-LABEL: atomic_load_flat_monotonic_i8_zext_to_i16:
43; GCN:       ; %bb.0:
44; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GCN-NEXT:    flat_load_ubyte v0, v[0:1] glc
46; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
47; GCN-NEXT:    s_setpc_b64 s[30:31]
48  %load = load atomic i8, ptr %ptr monotonic, align 1
49  %ext = zext i8 %load to i16
50  ret i16 %ext
51}
52
53define i16 @atomic_load_flat_monotonic_i8_sext_to_i16(ptr %ptr) {
54; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
55; GCN:       ; %bb.0:
56; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57; GCN-NEXT:    flat_load_sbyte v0, v[0:1] glc
58; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
59; GCN-NEXT:    s_setpc_b64 s[30:31]
60  %load = load atomic i8, ptr %ptr monotonic, align 1
61  %ext = sext i8 %load to i16
62  ret i16 %ext
63}
64
65define i16 @atomic_load_flat_monotonic_i16(ptr %ptr) {
66; GCN-LABEL: atomic_load_flat_monotonic_i16:
67; GCN:       ; %bb.0:
68; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; GCN-NEXT:    flat_load_ushort v0, v[0:1] glc
70; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
71; GCN-NEXT:    s_setpc_b64 s[30:31]
72  %load = load atomic i16, ptr %ptr monotonic, align 2
73  ret i16 %load
74}
75
76define i32 @atomic_load_flat_monotonic_i16_zext_to_i32(ptr %ptr) {
77; GCN-LABEL: atomic_load_flat_monotonic_i16_zext_to_i32:
78; GCN:       ; %bb.0:
79; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GCN-NEXT:    flat_load_ushort v0, v[0:1] glc
81; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
82; GCN-NEXT:    s_setpc_b64 s[30:31]
83  %load = load atomic i16, ptr %ptr monotonic, align 2
84  %ext = zext i16 %load to i32
85  ret i32 %ext
86}
87
88define i32 @atomic_load_flat_monotonic_i16_sext_to_i32(ptr %ptr) {
89; GCN-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
90; GCN:       ; %bb.0:
91; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92; GCN-NEXT:    flat_load_sshort v0, v[0:1] glc
93; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
94; GCN-NEXT:    s_setpc_b64 s[30:31]
95  %load = load atomic i16, ptr %ptr monotonic, align 2
96  %ext = sext i16 %load to i32
97  ret i32 %ext
98}
99
100define half @atomic_load_flat_monotonic_f16(ptr %ptr) {
101; GCN-LABEL: atomic_load_flat_monotonic_f16:
102; GCN:       ; %bb.0:
103; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104; GCN-NEXT:    flat_load_ushort v0, v[0:1] glc
105; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
106; GCN-NEXT:    s_setpc_b64 s[30:31]
107  %load = load atomic half, ptr %ptr monotonic, align 2
108  ret half %load
109}
110
111define bfloat @atomic_load_flat_monotonic_bf16(ptr %ptr) {
112; GCN-LABEL: atomic_load_flat_monotonic_bf16:
113; GCN:       ; %bb.0:
114; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115; GCN-NEXT:    flat_load_ushort v0, v[0:1] glc
116; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
117; GCN-NEXT:    s_setpc_b64 s[30:31]
118  %load = load atomic bfloat, ptr %ptr monotonic, align 2
119  ret bfloat %load
120}
121
122define i32 @atomic_load_flat_monotonic_f16_zext_to_i32(ptr %ptr) {
123; GCN-LABEL: atomic_load_flat_monotonic_f16_zext_to_i32:
124; GCN:       ; %bb.0:
125; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126; GCN-NEXT:    flat_load_ushort v0, v[0:1] glc
127; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
128; GCN-NEXT:    s_setpc_b64 s[30:31]
129  %load = load atomic half, ptr %ptr monotonic, align 2
130  %cast = bitcast half %load to i16
131  %ext = zext i16 %cast to i32
132  ret i32 %ext
133}
134
135define i32 @atomic_load_flat_monotonic_bf16_zext_to_i32(ptr %ptr) {
136; GCN-LABEL: atomic_load_flat_monotonic_bf16_zext_to_i32:
137; GCN:       ; %bb.0:
138; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139; GCN-NEXT:    flat_load_ushort v0, v[0:1] glc
140; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
141; GCN-NEXT:    s_setpc_b64 s[30:31]
142  %load = load atomic bfloat, ptr %ptr monotonic, align 2
143  %cast = bitcast bfloat %load to i16
144  %ext = zext i16 %cast to i32
145  ret i32 %ext
146}
147
148define i32 @atomic_load_flat_monotonic_i16_d16_hi_shift(ptr %ptr) {
149; GCN-LABEL: atomic_load_flat_monotonic_i16_d16_hi_shift:
150; GCN:       ; %bb.0:
151; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152; GCN-NEXT:    flat_load_ushort v0, v[0:1] glc
153; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
154; GCN-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
155; GCN-NEXT:    s_setpc_b64 s[30:31]
156  %load = load atomic i16, ptr %ptr monotonic, align 2
157  %ext = zext i16 %load to i32
158  %shl = shl i32 %ext, 16
159  ret i32 %shl
160}
161
162define <2 x i16> @atomic_load_flat_monotonic_i16_d16_hi_vector_insert(ptr %ptr, <2 x i16> %vec) {
163; GFX7-LABEL: atomic_load_flat_monotonic_i16_d16_hi_vector_insert:
164; GFX7:       ; %bb.0:
165; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166; GFX7-NEXT:    flat_load_ushort v0, v[0:1] glc
167; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
168; GFX7-NEXT:    v_and_b32_e32 v2, 0xffff, v2
169; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
170; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff, v1
171; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
172; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
173; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
174; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
175; GFX7-NEXT:    s_setpc_b64 s[30:31]
176;
177; GFX8-LABEL: atomic_load_flat_monotonic_i16_d16_hi_vector_insert:
178; GFX8:       ; %bb.0:
179; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180; GFX8-NEXT:    flat_load_ushort v0, v[0:1] glc
181; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
182; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
183; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
184; GFX8-NEXT:    s_setpc_b64 s[30:31]
185;
186; GFX9-LABEL: atomic_load_flat_monotonic_i16_d16_hi_vector_insert:
187; GFX9:       ; %bb.0:
188; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189; GFX9-NEXT:    flat_load_ushort v0, v[0:1] glc
190; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffff
191; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
192; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
193; GFX9-NEXT:    v_and_or_b32 v0, v2, v1, v0
194; GFX9-NEXT:    s_setpc_b64 s[30:31]
195  %load = load atomic i16, ptr %ptr monotonic, align 2
196  %insert = insertelement <2 x i16> %vec, i16 %load, i32 1
197  ret <2 x i16> %insert
198}
199
200define i32 @atomic_load_flat_monotonic_i16_d16_lo_or(ptr %ptr, i16 %high) {
201; GFX7-LABEL: atomic_load_flat_monotonic_i16_d16_lo_or:
202; GFX7:       ; %bb.0:
203; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
204; GFX7-NEXT:    flat_load_ushort v0, v[0:1] glc
205; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff, v2
206; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
207; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
208; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
209; GFX7-NEXT:    s_setpc_b64 s[30:31]
210;
211; GFX8-LABEL: atomic_load_flat_monotonic_i16_d16_lo_or:
212; GFX8:       ; %bb.0:
213; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214; GFX8-NEXT:    flat_load_ushort v0, v[0:1] glc
215; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v2
216; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
217; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
218; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
219; GFX8-NEXT:    s_setpc_b64 s[30:31]
220;
221; GFX9-LABEL: atomic_load_flat_monotonic_i16_d16_lo_or:
222; GFX9:       ; %bb.0:
223; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224; GFX9-NEXT:    flat_load_ushort v0, v[0:1] glc
225; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v2
226; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
227; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
228; GFX9-NEXT:    s_setpc_b64 s[30:31]
229  %load = load atomic i16, ptr %ptr monotonic, align 2
230  %ext = zext i16 %load to i32
231  %high.ext = zext i16 %high to i32
232  %shl = shl i32 %high.ext, 16
233  %or = or i32 %shl, %ext
234  ret i32 %or
235}
236
237define <2 x i16> @atomic_load_flat_monotonic_i16_d16_lo_vector_insert(ptr %ptr, <2 x i16> %vec) {
238; GFX7-LABEL: atomic_load_flat_monotonic_i16_d16_lo_vector_insert:
239; GFX7:       ; %bb.0:
240; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241; GFX7-NEXT:    flat_load_ushort v0, v[0:1] glc
242; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
243; GFX7-NEXT:    v_and_b32_e32 v2, 0xffff, v2
244; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
245; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
246; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
247; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
248; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
249; GFX7-NEXT:    s_setpc_b64 s[30:31]
250;
251; GFX8-LABEL: atomic_load_flat_monotonic_i16_d16_lo_vector_insert:
252; GFX8:       ; %bb.0:
253; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254; GFX8-NEXT:    flat_load_ushort v0, v[0:1] glc
255; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff0000, v2
256; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
257; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
258; GFX8-NEXT:    s_setpc_b64 s[30:31]
259;
260; GFX9-LABEL: atomic_load_flat_monotonic_i16_d16_lo_vector_insert:
261; GFX9:       ; %bb.0:
262; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263; GFX9-NEXT:    flat_load_ushort v0, v[0:1] glc
264; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffff0000
265; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
266; GFX9-NEXT:    v_and_or_b32 v0, v2, v1, v0
267; GFX9-NEXT:    s_setpc_b64 s[30:31]
268  %load = load atomic i16, ptr %ptr monotonic, align 2
269  %insert = insertelement <2 x i16> %vec, i16 %load, i32 0
270  ret <2 x i16> %insert
271}
272