xref: /llvm-project/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll (revision deab451e7a7f2dff42097049274637052c87eabd)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
3; Ports of most of test/CodeGen/NVPTX/access-non-generic.ll
4
5@scalar = internal addrspace(3) global float 0.0, align 4
6@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4
7
8define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 {
9; CHECK-LABEL: define amdgpu_kernel void @load_store_lds_f32(
10; CHECK-SAME: i32 [[I:%.*]], float [[V:%.*]]) #[[ATTR0:[0-9]+]] {
11; CHECK-NEXT:  [[BB:.*:]]
12; CHECK-NEXT:    [[TMP:%.*]] = load float, ptr addrspace(3) @scalar, align 4
13; CHECK-NEXT:    call void @use(float [[TMP]])
14; CHECK-NEXT:    store float [[V]], ptr addrspace(3) @scalar, align 4
15; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
16; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr addrspace(3) @scalar, align 4
17; CHECK-NEXT:    call void @use(float [[TMP2]])
18; CHECK-NEXT:    store float [[V]], ptr addrspace(3) @scalar, align 4
19; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
20; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4
21; CHECK-NEXT:    call void @use(float [[TMP3]])
22; CHECK-NEXT:    store float [[V]], ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4
23; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
24; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 5
25; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr addrspace(3) [[TMP4]], align 4
26; CHECK-NEXT:    call void @use(float [[TMP5]])
27; CHECK-NEXT:    store float [[V]], ptr addrspace(3) [[TMP4]], align 4
28; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
29; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 [[I]]
30; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr addrspace(3) [[TMP7]], align 4
31; CHECK-NEXT:    call void @use(float [[TMP8]])
32; CHECK-NEXT:    store float [[V]], ptr addrspace(3) [[TMP7]], align 4
33; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
34; CHECK-NEXT:    ret void
35;
36bb:
37  %tmp = load float, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4
38  call void @use(float %tmp)
39  store float %v, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4
40  call void @llvm.amdgcn.s.barrier()
41  %tmp1 = addrspacecast ptr addrspace(3) @scalar to ptr
42  %tmp2 = load float, ptr %tmp1, align 4
43  call void @use(float %tmp2)
44  store float %v, ptr %tmp1, align 4
45  call void @llvm.amdgcn.s.barrier()
46  %tmp3 = load float, ptr getelementptr inbounds ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i32 0, i32 5), align 4
47  call void @use(float %tmp3)
48  store float %v, ptr getelementptr inbounds ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i32 0, i32 5), align 4
49  call void @llvm.amdgcn.s.barrier()
50  %tmp4 = getelementptr inbounds [10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i32 0, i32 5
51  %tmp5 = load float, ptr %tmp4, align 4
52  call void @use(float %tmp5)
53  store float %v, ptr %tmp4, align 4
54  call void @llvm.amdgcn.s.barrier()
55  %tmp6 = addrspacecast ptr addrspace(3) @array to ptr
56  %tmp7 = getelementptr inbounds [10 x float], ptr %tmp6, i32 0, i32 %i
57  %tmp8 = load float, ptr %tmp7, align 4
58  call void @use(float %tmp8)
59  store float %v, ptr %tmp7, align 4
60  call void @llvm.amdgcn.s.barrier()
61  ret void
62}
63
64define i32 @constexpr_load_int_from_float_lds() #0 {
65; CHECK-LABEL: define i32 @constexpr_load_int_from_float_lds(
66; CHECK-SAME: ) #[[ATTR0]] {
67; CHECK-NEXT:  [[BB:.*:]]
68; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr addrspace(3) @scalar, align 4
69; CHECK-NEXT:    ret i32 [[TMP]]
70;
71bb:
72  %tmp = load i32, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4
73  ret i32 %tmp
74}
75
76define i32 @load_int_from_global_float(ptr addrspace(1) %input, i32 %i, i32 %j) #0 {
77; CHECK-LABEL: define i32 @load_int_from_global_float(
78; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) #[[ATTR0]] {
79; CHECK-NEXT:  [[BB:.*:]]
80; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i32 [[I]]
81; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr addrspace(1) [[TMP1]], i32 [[J]]
82; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP2]], align 4
83; CHECK-NEXT:    ret i32 [[TMP4]]
84;
85bb:
86  %tmp = addrspacecast ptr addrspace(1) %input to ptr
87  %tmp1 = getelementptr float, ptr %tmp, i32 %i
88  %tmp2 = getelementptr float, ptr %tmp1, i32 %j
89  %tmp4 = load i32, ptr %tmp2
90  ret i32 %tmp4
91}
92
93define amdgpu_kernel void @nested_const_expr() #0 {
94; CHECK-LABEL: define amdgpu_kernel void @nested_const_expr(
95; CHECK-SAME: ) #[[ATTR0]] {
96; CHECK-NEXT:    store i32 1, ptr addrspace(3) getelementptr ([10 x float], ptr addrspace(3) @array, i64 0, i64 1), align 4
97; CHECK-NEXT:    ret void
98;
99  store i32 1, ptr bitcast (ptr getelementptr ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i64 0, i64 1) to ptr), align 4
100
101  ret void
102}
103
104define amdgpu_kernel void @rauw(ptr addrspace(1) %input) #0 {
105; CHECK-LABEL: define amdgpu_kernel void @rauw(
106; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]]) #[[ATTR0]] {
107; CHECK-NEXT:  [[BB:.*:]]
108; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i64 10
109; CHECK-NEXT:    [[V:%.*]] = load float, ptr addrspace(1) [[ADDR]], align 4
110; CHECK-NEXT:    store float [[V]], ptr addrspace(1) [[ADDR]], align 4
111; CHECK-NEXT:    ret void
112;
113bb:
114  %generic_input = addrspacecast ptr addrspace(1) %input to ptr
115  %addr = getelementptr float, ptr %generic_input, i64 10
116  %v = load float, ptr %addr
117  store float %v, ptr %addr
118  ret void
119}
120
121; FIXME: Should be able to eliminate the cast inside the loop
122define amdgpu_kernel void @loop() #0 {
123; CHECK-LABEL: define amdgpu_kernel void @loop(
124; CHECK-SAME: ) #[[ATTR0]] {
125; CHECK-NEXT:  [[ENTRY:.*]]:
126; CHECK-NEXT:    [[END:%.*]] = getelementptr float, ptr addrspace(3) @array, i64 10
127; CHECK-NEXT:    br label %[[LOOP:.*]]
128; CHECK:       [[LOOP]]:
129; CHECK-NEXT:    [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ]
130; CHECK-NEXT:    [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4
131; CHECK-NEXT:    call void @use(float [[V]])
132; CHECK-NEXT:    [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1
133; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp eq ptr addrspace(3) [[I2]], [[END]]
134; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
135; CHECK:       [[EXIT]]:
136; CHECK-NEXT:    ret void
137;
138entry:
139  %p = addrspacecast ptr addrspace(3) @array to ptr
140  %end = getelementptr float, ptr %p, i64 10
141  br label %loop
142
143loop:                                             ; preds = %loop, %entry
144  %i = phi ptr [ %p, %entry ], [ %i2, %loop ]
145  %v = load float, ptr %i
146  call void @use(float %v)
147  %i2 = getelementptr float, ptr %i, i64 1
148  %exit_cond = icmp eq ptr %i2, %end
149  br i1 %exit_cond, label %exit, label %loop
150
151exit:                                             ; preds = %loop
152  ret void
153}
154
155@generic_end = external addrspace(1) global ptr
156
157define amdgpu_kernel void @loop_with_generic_bound() #0 {
158; CHECK-LABEL: define amdgpu_kernel void @loop_with_generic_bound(
159; CHECK-SAME: ) #[[ATTR0]] {
160; CHECK-NEXT:  [[ENTRY:.*]]:
161; CHECK-NEXT:    [[END:%.*]] = load ptr, ptr addrspace(1) @generic_end, align 8
162; CHECK-NEXT:    br label %[[LOOP:.*]]
163; CHECK:       [[LOOP]]:
164; CHECK-NEXT:    [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ]
165; CHECK-NEXT:    [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4
166; CHECK-NEXT:    call void @use(float [[V]])
167; CHECK-NEXT:    [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1
168; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
169; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp eq ptr [[TMP0]], [[END]]
170; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
171; CHECK:       [[EXIT]]:
172; CHECK-NEXT:    ret void
173;
174entry:
175  %p = addrspacecast ptr addrspace(3) @array to ptr
176  %end = load ptr, ptr addrspace(1) @generic_end
177  br label %loop
178
179loop:                                             ; preds = %loop, %entry
180  %i = phi ptr [ %p, %entry ], [ %i2, %loop ]
181  %v = load float, ptr %i
182  call void @use(float %v)
183  %i2 = getelementptr float, ptr %i, i64 1
184  %exit_cond = icmp eq ptr %i2, %end
185  br i1 %exit_cond, label %exit, label %loop
186
187exit:                                             ; preds = %loop
188  ret void
189}
190
191define void @select_bug() #0 {
192; CHECK-LABEL: define void @select_bug(
193; CHECK-SAME: ) #[[ATTR0]] {
194; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr inttoptr (i64 4873 to ptr), null
195; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i64 73, i64 93
196; CHECK-NEXT:    [[ADD_PTR157:%.*]] = getelementptr inbounds i64, ptr undef, i64 [[SEL]]
197; CHECK-NEXT:    [[CMP169:%.*]] = icmp uge ptr undef, [[ADD_PTR157]]
198; CHECK-NEXT:    unreachable
199;
200  %cmp = icmp ne ptr inttoptr (i64 4873 to ptr), null
201  %sel = select i1 %cmp, i64 73, i64 93
202  %add.ptr157 = getelementptr inbounds i64, ptr undef, i64 %sel
203  %cmp169 = icmp uge ptr undef, %add.ptr157
204  unreachable
205}
206
207declare void @llvm.amdgcn.s.barrier() #1
208declare void @use(float) #0
209
210attributes #0 = { nounwind }
211attributes #1 = { convergent nounwind }
212