xref: /llvm-project/llvm/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll (revision bdf2fbba9cee60b4b260ff17e4f44c475c11e715)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare -amdgpu-codegenprepare-widen-constant-loads < %s | FileCheck -check-prefix=OPT %s
3
4declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
5
6define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
7; OPT-LABEL: @constant_load_i1(
8; OPT-NEXT:    [[VAL:%.*]] = load i1, ptr addrspace(4) [[IN:%.*]], align 1
9; OPT-NEXT:    store i1 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 1
10; OPT-NEXT:    ret void
11;
12  %val = load i1, ptr addrspace(4) %in
13  store i1 %val, ptr addrspace(1) %out
14  ret void
15}
16
17define amdgpu_kernel void @constant_load_i1_align2(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
18; OPT-LABEL: @constant_load_i1_align2(
19; OPT-NEXT:    [[VAL:%.*]] = load i1, ptr addrspace(4) [[IN:%.*]], align 2
20; OPT-NEXT:    store i1 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 2
21; OPT-NEXT:    ret void
22;
23  %val = load i1, ptr addrspace(4) %in, align 2
24  store i1 %val, ptr addrspace(1) %out, align 2
25  ret void
26}
27
28define amdgpu_kernel void @constant_load_i1_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
29; OPT-LABEL: @constant_load_i1_align4(
30; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
31; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i1
32; OPT-NEXT:    store i1 [[TMP3]], ptr addrspace(1) [[OUT:%.*]], align 4
33; OPT-NEXT:    ret void
34;
35  %val = load i1, ptr addrspace(4) %in, align 4
36  store i1 %val, ptr addrspace(1) %out, align 4
37  ret void
38}
39
40define amdgpu_kernel void @constant_load_i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
41; OPT-LABEL: @constant_load_i8(
42; OPT-NEXT:    [[VAL:%.*]] = load i8, ptr addrspace(4) [[IN:%.*]], align 1
43; OPT-NEXT:    store i8 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 1
44; OPT-NEXT:    ret void
45;
46  %val = load i8, ptr addrspace(4) %in
47  store i8 %val, ptr addrspace(1) %out
48  ret void
49}
50
51define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
52; OPT-LABEL: @constant_load_i8_align2(
53; OPT-NEXT:    [[VAL:%.*]] = load i8, ptr addrspace(4) [[IN:%.*]], align 2
54; OPT-NEXT:    store i8 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 2
55; OPT-NEXT:    ret void
56;
57  %val = load i8, ptr addrspace(4) %in, align 2
58  store i8 %val, ptr addrspace(1) %out, align 2
59  ret void
60}
61
62define amdgpu_kernel void @constant_load_i8align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
63; OPT-LABEL: @constant_load_i8align4(
64; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
65; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
66; OPT-NEXT:    store i8 [[TMP3]], ptr addrspace(1) [[OUT:%.*]], align 4
67; OPT-NEXT:    ret void
68;
69  %val = load i8, ptr addrspace(4) %in, align 4
70  store i8 %val, ptr addrspace(1) %out, align 4
71  ret void
72}
73
74define amdgpu_kernel void @constant_load_v2i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
75; OPT-LABEL: @constant_load_v2i8(
76; OPT-NEXT:    [[LD:%.*]] = load <2 x i8>, ptr addrspace(4) [[IN:%.*]], align 2
77; OPT-NEXT:    store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2
78; OPT-NEXT:    ret void
79;
80  %ld = load <2 x i8>, ptr addrspace(4) %in
81  store <2 x i8> %ld, ptr addrspace(1) %out
82  ret void
83}
84
85define amdgpu_kernel void @constant_load_v2i8_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
86; OPT-LABEL: @constant_load_v2i8_align4(
87; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
88; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
89; OPT-NEXT:    [[TMP4:%.*]] = bitcast i16 [[TMP3]] to <2 x i8>
90; OPT-NEXT:    store <2 x i8> [[TMP4]], ptr addrspace(1) [[OUT:%.*]], align 4
91; OPT-NEXT:    ret void
92;
93  %ld = load <2 x i8>, ptr addrspace(4) %in, align 4
94  store <2 x i8> %ld, ptr addrspace(1) %out, align 4
95  ret void
96}
97
98define amdgpu_kernel void @constant_load_v3i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
99; OPT-LABEL: @constant_load_v3i8(
100; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
101; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i24
102; OPT-NEXT:    [[TMP4:%.*]] = bitcast i24 [[TMP3]] to <3 x i8>
103; OPT-NEXT:    store <3 x i8> [[TMP4]], ptr addrspace(1) [[OUT:%.*]], align 4
104; OPT-NEXT:    ret void
105;
106  %ld = load <3 x i8>, ptr addrspace(4) %in
107  store <3 x i8> %ld, ptr addrspace(1) %out
108  ret void
109}
110
111define amdgpu_kernel void @constant_load_v3i8_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
112; OPT-LABEL: @constant_load_v3i8_align4(
113; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
114; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i24
115; OPT-NEXT:    [[TMP4:%.*]] = bitcast i24 [[TMP3]] to <3 x i8>
116; OPT-NEXT:    store <3 x i8> [[TMP4]], ptr addrspace(1) [[OUT:%.*]], align 4
117; OPT-NEXT:    ret void
118;
119  %ld = load <3 x i8>, ptr addrspace(4) %in, align 4
120  store <3 x i8> %ld, ptr addrspace(1) %out, align 4
121  ret void
122}
123
124define amdgpu_kernel void @constant_load_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
125; OPT-LABEL: @constant_load_i16(
126; OPT-NEXT:    [[LD:%.*]] = load i16, ptr addrspace(4) [[IN:%.*]], align 2
127; OPT-NEXT:    [[EXT:%.*]] = sext i16 [[LD]] to i32
128; OPT-NEXT:    store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
129; OPT-NEXT:    ret void
130;
131  %ld = load i16, ptr addrspace(4) %in
132  %ext = sext i16 %ld to i32
133  store i32 %ext, ptr addrspace(1) %out
134  ret void
135}
136
137define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
138; OPT-LABEL: @constant_load_i16_align4(
139; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
140; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
141; OPT-NEXT:    [[EXT:%.*]] = sext i16 [[TMP3]] to i32
142; OPT-NEXT:    store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
143; OPT-NEXT:    ret void
144;
145  %ld = load i16, ptr addrspace(4) %in, align 4
146  %ext = sext i16 %ld to i32
147  store i32 %ext, ptr addrspace(1) %out, align 4
148  ret void
149}
150
151define amdgpu_kernel void @constant_load_f16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
152; OPT-LABEL: @constant_load_f16(
153; OPT-NEXT:    [[LD:%.*]] = load half, ptr addrspace(4) [[IN:%.*]], align 2
154; OPT-NEXT:    store half [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2
155; OPT-NEXT:    ret void
156;
157  %ld = load half, ptr addrspace(4) %in
158  store half %ld, ptr addrspace(1) %out
159  ret void
160}
161
162define amdgpu_kernel void @constant_load_v2f16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
163; OPT-LABEL: @constant_load_v2f16(
164; OPT-NEXT:    [[LD:%.*]] = load <2 x half>, ptr addrspace(4) [[IN:%.*]], align 4
165; OPT-NEXT:    store <2 x half> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
166; OPT-NEXT:    ret void
167;
168  %ld = load <2 x half>, ptr addrspace(4) %in
169  store <2 x half> %ld, ptr addrspace(1) %out
170  ret void
171}
172
173define amdgpu_kernel void @load_volatile(ptr addrspace(1) %out, ptr addrspace(4) %in) {
174; OPT-LABEL: @load_volatile(
175; OPT-NEXT:    [[A:%.*]] = load volatile i16, ptr addrspace(4) [[IN:%.*]], align 2
176; OPT-NEXT:    store i16 [[A]], ptr addrspace(1) [[OUT:%.*]], align 2
177; OPT-NEXT:    ret void
178;
179  %a = load volatile i16, ptr addrspace(4) %in
180  store i16 %a, ptr addrspace(1) %out
181  ret void
182}
183
184define amdgpu_kernel void @constant_load_v2i8_volatile(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
185; OPT-LABEL: @constant_load_v2i8_volatile(
186; OPT-NEXT:    [[LD:%.*]] = load volatile <2 x i8>, ptr addrspace(4) [[IN:%.*]], align 2
187; OPT-NEXT:    store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2
188; OPT-NEXT:    ret void
189;
190  %ld = load volatile <2 x i8>, ptr addrspace(4) %in
191  store <2 x i8> %ld, ptr addrspace(1) %out
192  ret void
193}
194
195define amdgpu_kernel void @constant_load_v2i8_addrspace1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
196; OPT-LABEL: @constant_load_v2i8_addrspace1(
197; OPT-NEXT:    [[LD:%.*]] = load <2 x i8>, ptr addrspace(1) [[IN:%.*]], align 2
198; OPT-NEXT:    store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2
199; OPT-NEXT:    ret void
200;
201  %ld = load <2 x i8>, ptr addrspace(1) %in
202  store <2 x i8> %ld, ptr addrspace(1) %out
203  ret void
204}
205
206define amdgpu_kernel void @use_dispatch_ptr(ptr addrspace(1) %ptr) #1 {
207; OPT-LABEL: @use_dispatch_ptr(
208; OPT-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
209; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DISPATCH_PTR]], align 4
210; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
211; OPT-NEXT:    [[LD:%.*]] = zext i8 [[TMP3]] to i32
212; OPT-NEXT:    store i32 [[LD]], ptr addrspace(1) [[PTR:%.*]], align 4
213; OPT-NEXT:    ret void
214;
215  %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
216  %val = load i8, ptr addrspace(4) %dispatch.ptr, align 4
217  %ld = zext i8 %val to i32
218  store i32 %ld, ptr addrspace(1) %ptr
219  ret void
220}
221
222define amdgpu_kernel void @constant_load_i16_align4_range(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
223; OPT-LABEL: @constant_load_i16_align4_range(
224; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG0:![0-9]+]]
225; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
226; OPT-NEXT:    [[EXT:%.*]] = sext i16 [[TMP3]] to i32
227; OPT-NEXT:    store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
228; OPT-NEXT:    ret void
229;
230  %ld = load i16, ptr addrspace(4) %in, align 4, !range !0
231  %ext = sext i16 %ld to i32
232  store i32 %ext, ptr addrspace(1) %out
233  ret void
234}
235
236define amdgpu_kernel void @constant_load_i16_align4_range_max(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
237; OPT-LABEL: @constant_load_i16_align4_range_max(
238; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG0]]
239; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
240; OPT-NEXT:    [[EXT:%.*]] = sext i16 [[TMP3]] to i32
241; OPT-NEXT:    store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
242; OPT-NEXT:    ret void
243;
244  %ld = load i16, ptr addrspace(4) %in, align 4, !range !1
245  %ext = sext i16 %ld to i32
246  store i32 %ext, ptr addrspace(1) %out
247  ret void
248}
249
250define amdgpu_kernel void @constant_load_i16_align4_complex_range(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
251; OPT-LABEL: @constant_load_i16_align4_complex_range(
252; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG1:![0-9]+]]
253; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
254; OPT-NEXT:    [[EXT:%.*]] = sext i16 [[TMP3]] to i32
255; OPT-NEXT:    store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
256; OPT-NEXT:    ret void
257;
258  %ld = load i16, ptr addrspace(4) %in, align 4, !range !2
259  %ext = sext i16 %ld to i32
260  store i32 %ext, ptr addrspace(1) %out
261  ret void
262}
263
264define amdgpu_kernel void @constant_load_i16_align4_range_from_0(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
265; OPT-LABEL: @constant_load_i16_align4_range_from_0(
266; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
267; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
268; OPT-NEXT:    [[EXT:%.*]] = sext i16 [[TMP3]] to i32
269; OPT-NEXT:    store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
270; OPT-NEXT:    ret void
271;
272  %ld = load i16, ptr addrspace(4) %in, align 4, !range !3
273  %ext = sext i16 %ld to i32
274  store i32 %ext, ptr addrspace(1) %out
275  ret void
276}
277
278define amdgpu_kernel void @constant_load_i16_align4_range_from_neg(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
279; OPT-LABEL: @constant_load_i16_align4_range_from_neg(
280; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG2:![0-9]+]]
281; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
282; OPT-NEXT:    [[EXT:%.*]] = sext i16 [[TMP3]] to i32
283; OPT-NEXT:    store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
284; OPT-NEXT:    ret void
285;
286  %ld = load i16, ptr addrspace(4) %in, align 4, !range !4
287  %ext = sext i16 %ld to i32
288  store i32 %ext, ptr addrspace(1) %out
289  ret void
290}
291
292define amdgpu_kernel void @constant_load_i16_align4_range_from_neg_to_0(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
293; OPT-LABEL: @constant_load_i16_align4_range_from_neg_to_0(
294; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG2]]
295; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
296; OPT-NEXT:    [[EXT:%.*]] = sext i16 [[TMP3]] to i32
297; OPT-NEXT:    store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
298; OPT-NEXT:    ret void
299;
300  %ld = load i16, ptr addrspace(4) %in, align 4, !range !5
301  %ext = sext i16 %ld to i32
302  store i32 %ext, ptr addrspace(1) %out
303  ret void
304}
305
306define amdgpu_kernel void @constant_load_i16_align4_invariant(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
307; OPT-LABEL: @constant_load_i16_align4_invariant(
308; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !invariant.load !3
309; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
310; OPT-NEXT:    [[EXT:%.*]] = sext i16 [[TMP3]] to i32
311; OPT-NEXT:    store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4
312; OPT-NEXT:    ret void
313;
314  %ld = load i16, ptr addrspace(4) %in, align 4, !invariant.load !6
315  %ext = sext i16 %ld to i32
316  store i32 %ext, ptr addrspace(1) %out
317  ret void
318}
319
320attributes #0 = { nounwind }
321
322; OPT: !0 = !{i32 5, i32 0}
323; OPT: !1 = !{i32 8, i32 0}
324; OPT: !2 = !{i32 65520, i32 0}
325; OPT: !3 = !{}
326
327!0 = !{i16 5, i16 500}
328!1 = !{i16 5, i16 -1}
329!2 = !{i16 8, i16 12, i16 42, i16 99}
330!3 = !{i16 0, i16 255}
331!4 = !{i16 -16, i16 16}
332!5 = !{i16 -16, i16 0}
333!6 = !{}
334