xref: /llvm-project/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-control-flow.ll (revision 6540f1635a6566aef93d9b4f568ac648474c39e7)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
3; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
4
5target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
6target triple = "amdgcn--"
7
8;; This should optimize to just the offset part
9define float @sum(ptr addrspace(8) %buf, i32 %len) {
10; CHECK-LABEL: define float @sum
11; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] {
12; CHECK-NEXT:  entry:
13; CHECK-NEXT:    br label [[LOOP:%.*]]
14; CHECK:       loop:
15; CHECK-NEXT:    [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
16; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
17; CHECK-NEXT:    [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
18; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
19; CHECK-NEXT:    [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
20; CHECK-NEXT:    [[PTR]] = add i32 [[PTR_PREV_OFF]], 4
21; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
22; CHECK-NEXT:    [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
23; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
24; CHECK:       exit:
25; CHECK-NEXT:    ret float [[SUM]]
26;
27entry:
28  %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
29  br label %loop
30loop:
31  %sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ]
32  %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ]
33  %i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
34
35  %val = load float, ptr addrspace(7) %ptr.prev
36  %sum = fadd float %sum.prev, %val
37
38  %ptr = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
39  %i.next = add i32 %i, 1
40  %test = icmp ult i32 %i.next, %len
41  br i1 %test, label %loop, label %exit
42exit:
43  ret float %sum
44}
45
46;; But this should not
47define float @sum_integer_ops(ptr addrspace(8) %buf, i32 %len) {
48; CHECK-LABEL: define float @sum_integer_ops
49; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
50; CHECK-NEXT:  entry:
51; CHECK-NEXT:    br label [[LOOP:%.*]]
52; CHECK:       loop:
53; CHECK-NEXT:    [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
54; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
55; CHECK-NEXT:    [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF]], [[ENTRY]] ]
56; CHECK-NEXT:    [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
57; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
58; CHECK-NEXT:    [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
59; CHECK-NEXT:    [[PTR_PREV_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[PTR_PREV_RSRC]] to i160
60; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw i160 [[PTR_PREV_INT_RSRC]], 32
61; CHECK-NEXT:    [[PTR_PREV_INT_OFF:%.*]] = zext i32 [[PTR_PREV_OFF]] to i160
62; CHECK-NEXT:    [[PTR_PREV_INT:%.*]] = or i160 [[TMP0]], [[PTR_PREV_INT_OFF]]
63; CHECK-NEXT:    [[PTR_INT:%.*]] = add i160 [[PTR_PREV_INT]], 4
64; CHECK-NEXT:    [[TMP1:%.*]] = lshr i160 [[PTR_INT]], 32
65; CHECK-NEXT:    [[TMP2:%.*]] = trunc i160 [[TMP1]] to i128
66; CHECK-NEXT:    [[PTR_RSRC]] = inttoptr i128 [[TMP2]] to ptr addrspace(8)
67; CHECK-NEXT:    [[PTR_OFF]] = trunc i160 [[PTR_INT]] to i32
68; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
69; CHECK-NEXT:    [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
70; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
71; CHECK:       exit:
72; CHECK-NEXT:    ret float [[SUM]]
73;
74entry:
75  %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
76  br label %loop
77loop:
78  %sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ]
79  %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ]
80  %i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
81
82  %val = load float, ptr addrspace(7) %ptr.prev
83  %sum = fadd float %sum.prev, %val
84
85  %ptr.prev.int = ptrtoint ptr addrspace(7) %ptr.prev to i160
86  %ptr.int = add i160 %ptr.prev.int, 4
87  %ptr = inttoptr i160 %ptr.int to ptr addrspace(7)
88  %i.next = add i32 %i, 1
89  %test = icmp ult i32 %i.next, %len
90  br i1 %test, label %loop, label %exit
91exit:
92  ret float %sum
93}
94
95;; Should go to offsets only
96define float @sum_2d(ptr addrspace(8) %buf, i32 %ii, i32 %jj) {
97; CHECK-LABEL: define float @sum_2d
98; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[II:%.*]], i32 [[JJ:%.*]]) #[[ATTR0]] {
99; CHECK-NEXT:  entry:
100; CHECK-NEXT:    br label [[LOOP1_ENTRY:%.*]]
101; CHECK:       loop1.entry:
102; CHECK-NEXT:    [[SUM1_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP1_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
103; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP1_EXIT]] ], [ 0, [[ENTRY]] ]
104; CHECK-NEXT:    [[PTR1_PREV_OFF:%.*]] = phi i32 [ [[PTR1:%.*]], [[LOOP1_EXIT]] ], [ 0, [[ENTRY]] ]
105; CHECK-NEXT:    br label [[LOOP2:%.*]]
106; CHECK:       loop2:
107; CHECK-NEXT:    [[SUM2_PREV:%.*]] = phi float [ [[SUM]], [[LOOP2]] ], [ [[SUM1_PREV]], [[LOOP1_ENTRY]] ]
108; CHECK-NEXT:    [[J:%.*]] = phi i32 [ [[J_NEXT:%.*]], [[LOOP2]] ], [ 0, [[LOOP1_ENTRY]] ]
109; CHECK-NEXT:    [[PTR2_PREV_OFF:%.*]] = phi i32 [ [[PTR2:%.*]], [[LOOP2]] ], [ [[PTR1_PREV_OFF]], [[LOOP1_ENTRY]] ]
110; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR2_PREV_OFF]], i32 0, i32 0)
111; CHECK-NEXT:    [[SUM]] = fadd float [[SUM2_PREV]], [[VAL]]
112; CHECK-NEXT:    [[PTR2]] = add i32 [[PTR2_PREV_OFF]], 4
113; CHECK-NEXT:    [[J_NEXT]] = add i32 [[J]], 1
114; CHECK-NEXT:    [[TEST2:%.*]] = icmp ult i32 [[J_NEXT]], [[JJ]]
115; CHECK-NEXT:    br i1 [[TEST2]], label [[LOOP2]], label [[LOOP1_EXIT]]
116; CHECK:       loop1.exit:
117; CHECK-NEXT:    [[PTR1]] = add i32 [[PTR2]], 4
118; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
119; CHECK-NEXT:    [[TEST1:%.*]] = icmp ult i32 [[I_NEXT]], [[II]]
120; CHECK-NEXT:    br i1 [[TEST1]], label [[LOOP1_ENTRY]], label [[EXIT:%.*]]
121; CHECK:       exit:
122; CHECK-NEXT:    ret float [[SUM]]
123;
124entry:
125  %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
126  br label %loop1.entry
127loop1.entry:
128  %sum1.prev = phi float [ %sum, %loop1.exit ], [ 0.0, %entry ]
129  %ptr1.prev = phi ptr addrspace(7) [ %ptr1, %loop1.exit ], [ %start, %entry ]
130  %i = phi i32 [ %i.next, %loop1.exit ], [ 0, %entry ]
131
132  br label %loop2
133loop2:
134  %sum2.prev = phi float [ %sum, %loop2 ], [ %sum1.prev, %loop1.entry ]
135  %ptr2.prev = phi ptr addrspace(7) [ %ptr2, %loop2 ], [ %ptr1.prev, %loop1.entry ]
136  %j = phi i32 [ %j.next, %loop2 ], [ 0, %loop1.entry ]
137
138  %val = load float, ptr addrspace(7) %ptr2.prev
139  %sum = fadd float %sum2.prev, %val
140
141  %ptr2 = getelementptr float, ptr addrspace(7) %ptr2.prev, i32 1
142  %j.next = add i32 %j, 1
143  %test2 = icmp ult i32 %j.next, %jj
144
145  br i1 %test2, label %loop2, label %loop1.exit
146loop1.exit:
147  %ptr1 = getelementptr float, ptr addrspace(7) %ptr2, i32 1
148  %i.next = add i32 %i, 1
149  %test1 = icmp ult i32 %i.next, %ii
150  br i1 %test1, label %loop1.entry, label %exit
151exit:
152  ret float %sum
153}
154
155;; This should optimize to just the offset parts since all the arguments to the
156;; select point to the same buffer.
157define float @sum_jump_on_negative(ptr addrspace(8) %buf, i32 %len) {
158; CHECK-LABEL: define float @sum_jump_on_negative
159; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
160; CHECK-NEXT:  entry:
161; CHECK-NEXT:    br label [[LOOP:%.*]]
162; CHECK:       loop:
163; CHECK-NEXT:    [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
164; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
165; CHECK-NEXT:    [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
166; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
167; CHECK-NEXT:    [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
168; CHECK-NEXT:    [[SKIP_NEXT:%.*]] = fcmp olt float [[VAL]], 0.000000e+00
169; CHECK-NEXT:    [[SMALL_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 4
170; CHECK-NEXT:    [[LARGE_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 8
171; CHECK-NEXT:    [[PTR_OFF]] = select i1 [[SKIP_NEXT]], i32 [[LARGE_JUMP]], i32 [[SMALL_JUMP]]
172; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
173; CHECK-NEXT:    [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
174; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
175; CHECK:       exit:
176; CHECK-NEXT:    ret float [[SUM]]
177;
178entry:
179  %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
180  br label %loop
181loop:
182  %sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ]
183  %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ]
184  %i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
185
186  %val = load float, ptr addrspace(7) %ptr.prev
187  %sum = fadd float %sum.prev, %val
188
189  %skip.next = fcmp olt float %val, 0.0
190  %small.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
191  %large.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 2
192  %ptr = select i1 %skip.next, ptr addrspace(7) %large.jump, ptr addrspace(7) %small.jump
193
194  %i.next = add i32 %i, 1
195  %test = icmp ult i32 %i.next, %len
196  br i1 %test, label %loop, label %exit
197exit:
198  ret float %sum
199}
200
201define float @sum_jump_on_negative_with_phi(ptr addrspace(8) %buf, i32 %len) {
202; CHECK-LABEL: define float @sum_jump_on_negative_with_phi
203; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
204; CHECK-NEXT:  entry:
205; CHECK-NEXT:    br label [[LOOP:%.*]]
206; CHECK:       loop:
207; CHECK-NEXT:    [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
208; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ]
209; CHECK-NEXT:    [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ]
210; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
211; CHECK-NEXT:    [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
212; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
213; CHECK-NEXT:    [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
214; CHECK-NEXT:    [[SKIP_NEXT:%.*]] = fcmp olt float [[VAL]], 0.000000e+00
215; CHECK-NEXT:    br i1 [[SKIP_NEXT]], label [[THEN:%.*]], label [[ELSE:%.*]]
216; CHECK:       then:
217; CHECK-NEXT:    [[LARGE_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 8
218; CHECK-NEXT:    br label [[LOOP_EXIT]]
219; CHECK:       else:
220; CHECK-NEXT:    [[SMALL_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 4
221; CHECK-NEXT:    br label [[LOOP_EXIT]]
222; CHECK:       loop.exit:
223; CHECK-NEXT:    [[PTR_OFF]] = phi i32 [ [[LARGE_JUMP]], [[THEN]] ], [ [[SMALL_JUMP]], [[ELSE]] ]
224; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
225; CHECK:       exit:
226; CHECK-NEXT:    ret float [[SUM]]
227;
228entry:
229  %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
230  br label %loop
231loop:
232  %sum.prev = phi float [ %sum, %loop.exit ], [ 0.0, %entry ]
233  %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop.exit ], [ %start, %entry ]
234  %i = phi i32 [ %i.next, %loop.exit ], [ 0, %entry ]
235
236  %val = load float, ptr addrspace(7) %ptr.prev
237  %sum = fadd float %sum.prev, %val
238
239  %i.next = add i32 %i, 1
240  %test = icmp ult i32 %i.next, %len
241
242  %skip.next = fcmp olt float %val, 0.0
243  br i1 %skip.next, label %then, label %else
244then:
245  %large.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 2
246  br label %loop.exit
247else:
248  %small.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
249  br label %loop.exit
250loop.exit:
251  %ptr = phi ptr addrspace(7) [ %large.jump, %then ], [ %small.jump, %else ]
252  br i1 %test, label %loop, label %exit
253exit:
254  ret float %sum
255}
256
257;; But this has a shifting resource part.
258define float @sum_new_buffer_on_negative(ptr addrspace(8) %buf1, ptr addrspace(8) %buf2, i32 %len) {
259; CHECK-LABEL: define float @sum_new_buffer_on_negative
260; CHECK-SAME: (ptr addrspace(8) [[BUF1:%.*]], ptr addrspace(8) [[BUF2:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
261; CHECK-NEXT:  entry:
262; CHECK-NEXT:    br label [[LOOP:%.*]]
263; CHECK:       loop:
264; CHECK-NEXT:    [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
265; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
266; CHECK-NEXT:    [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF1]], [[ENTRY]] ]
267; CHECK-NEXT:    [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
268; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
269; CHECK-NEXT:    [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
270; CHECK-NEXT:    [[HOP:%.*]] = fcmp olt float [[VAL]], 0.000000e+00
271; CHECK-NEXT:    [[THIS_NEXT:%.*]] = add i32 [[PTR_PREV_OFF]], 4
272; CHECK-NEXT:    [[PTR_RSRC]] = select i1 [[HOP]], ptr addrspace(8) [[PTR_PREV_RSRC]], ptr addrspace(8) [[BUF2]]
273; CHECK-NEXT:    [[PTR_OFF]] = select i1 [[HOP]], i32 [[THIS_NEXT]], i32 0
274; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
275; CHECK-NEXT:    [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
276; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
277; CHECK:       exit:
278; CHECK-NEXT:    ret float [[SUM]]
279;
280entry:
281  %start = addrspacecast ptr addrspace(8) %buf1 to ptr addrspace(7)
282  %start2 = addrspacecast ptr addrspace(8) %buf2 to ptr addrspace(7)
283  br label %loop
284loop:
285  %sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ]
286  %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ]
287  %i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
288
289  %val = load float, ptr addrspace(7) %ptr.prev
290  %sum = fadd float %sum.prev, %val
291
292  %hop = fcmp olt float %val, 0.0
293  %this.next = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
294  %ptr = select i1 %hop, ptr addrspace(7) %this.next, ptr addrspace(7) %start2
295
296  %i.next = add i32 %i, 1
297  %test = icmp ult i32 %i.next, %len
298  br i1 %test, label %loop, label %exit
299exit:
300  ret float %sum
301}
302
303;; As does this.
304define float @sum_new_buffer_on_negative_with_phi(ptr addrspace(8) %buf1, ptr addrspace(8) %buf2, i32 %len) {
305; CHECK-LABEL: define float @sum_new_buffer_on_negative_with_phi
306; CHECK-SAME: (ptr addrspace(8) [[BUF1:%.*]], ptr addrspace(8) [[BUF2:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
307; CHECK-NEXT:  entry:
308; CHECK-NEXT:    br label [[LOOP:%.*]]
309; CHECK:       loop:
310; CHECK-NEXT:    [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
311; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ]
312; CHECK-NEXT:    [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP_EXIT]] ], [ [[BUF1]], [[ENTRY]] ]
313; CHECK-NEXT:    [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ]
314; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
315; CHECK-NEXT:    [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
316; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
317; CHECK-NEXT:    [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
318; CHECK-NEXT:    [[HOP:%.*]] = fcmp olt float [[VAL]], 0.000000e+00
319; CHECK-NEXT:    br i1 [[HOP]], label [[THEN:%.*]], label [[LOOP_EXIT]]
320; CHECK:       then:
321; CHECK-NEXT:    [[THIS_NEXT:%.*]] = add i32 [[PTR_PREV_OFF]], 4
322; CHECK-NEXT:    br label [[LOOP_EXIT]]
323; CHECK:       loop.exit:
324; CHECK-NEXT:    [[PTR_RSRC]] = phi ptr addrspace(8) [ [[PTR_PREV_RSRC]], [[THEN]] ], [ [[BUF2]], [[LOOP]] ]
325; CHECK-NEXT:    [[PTR_OFF]] = phi i32 [ [[THIS_NEXT]], [[THEN]] ], [ 0, [[LOOP]] ]
326; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
327; CHECK:       exit:
328; CHECK-NEXT:    ret float [[SUM]]
329;
330entry:
331  %start = addrspacecast ptr addrspace(8) %buf1 to ptr addrspace(7)
332  %start2 = addrspacecast ptr addrspace(8) %buf2 to ptr addrspace(7)
333  br label %loop
334loop:
335  %sum.prev = phi float [ %sum, %loop.exit ], [ 0.0, %entry ]
336  %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop.exit ], [ %start, %entry ]
337  %i = phi i32 [ %i.next, %loop.exit ], [ 0, %entry ]
338
339  %val = load float, ptr addrspace(7) %ptr.prev
340  %sum = fadd float %sum.prev, %val
341
342  %i.next = add i32 %i, 1
343  %test = icmp ult i32 %i.next, %len
344  %hop = fcmp olt float %val, 0.0
345  br i1 %hop, label %then, label %loop.exit
346then:
347  %this.next = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
348  br label %loop.exit
349loop.exit:
350  %ptr = phi ptr addrspace(7) [ %this.next, %then ], [ %start2, %loop ]
351  br i1 %test, label %loop, label %exit
352exit:
353  ret float %sum
354}
355
356;; Test that the uniform buffer descriptor optimization works correctly for phi
357;; nodes that repeat the same predecessor multiple times.
358define float @sum_duplicate_preds(ptr addrspace(8) %buf, i32 %len) {
359; CHECK-LABEL: define float @sum_duplicate_preds
360; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
361; CHECK-NEXT:  entry:
362; CHECK-NEXT:    br label [[LOOP:%.*]]
363; CHECK:       loop:
364; CHECK-NEXT:    [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ [[SUM]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
365; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[I_NEXT]], [[LOOP]] ]
366; CHECK-NEXT:    [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[PTR]], [[LOOP]] ]
367; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
368; CHECK-NEXT:    [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
369; CHECK-NEXT:    [[PTR]] = add i32 [[PTR_PREV_OFF]], 4
370; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
371; CHECK-NEXT:    [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
372; CHECK-NEXT:    [[TEST_EXT:%.*]] = zext i1 [[TEST]] to i32
373; CHECK-NEXT:    switch i32 [[TEST_EXT]], label [[LOOP]] [
374; CHECK-NEXT:      i32 1, label [[LOOP]]
375; CHECK-NEXT:      i32 0, label [[EXIT:%.*]]
376; CHECK-NEXT:    ]
377; CHECK:       exit:
378; CHECK-NEXT:    ret float [[SUM]]
379;
380entry:
381  %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
382  br label %loop
383loop:
384  %sum.prev = phi float [ %sum, %loop ], [ %sum, %loop ], [ 0.0, %entry ]
385  %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ], [ %ptr, %loop ]
386  %i = phi i32 [ %i.next, %loop ], [ 0, %entry ], [ %i.next, %loop ]
387
388  %val = load float, ptr addrspace(7) %ptr.prev
389  %sum = fadd float %sum.prev, %val
390
391  %ptr = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
392  %i.next = add i32 %i, 1
393  %test = icmp ult i32 %i.next, %len
394  %test.ext = zext i1 %test to i32
395  switch i32 %test.ext, label %loop [
396  i32 1, label %loop
397  i32 0, label %exit
398  ]
399exit:
400  ret float %sum
401}
402
403;; And similirly check the "might not be uniform" case.
404define float @sum_integer_ops_duplicate_preds(ptr addrspace(8) %buf, i32 %len) {
405; CHECK-LABEL: define float @sum_integer_ops_duplicate_preds
406; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
407; CHECK-NEXT:  entry:
408; CHECK-NEXT:    br label [[LOOP:%.*]]
409; CHECK:       loop:
410; CHECK-NEXT:    [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ [[SUM]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
411; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[I_NEXT]], [[LOOP]] ]
412; CHECK-NEXT:    [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF]], [[ENTRY]] ], [ [[PTR_RSRC]], [[LOOP]] ]
413; CHECK-NEXT:    [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[PTR_OFF]], [[LOOP]] ]
414; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
415; CHECK-NEXT:    [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
416; CHECK-NEXT:    [[PTR_PREV_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[PTR_PREV_RSRC]] to i160
417; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw i160 [[PTR_PREV_INT_RSRC]], 32
418; CHECK-NEXT:    [[PTR_PREV_INT_OFF:%.*]] = zext i32 [[PTR_PREV_OFF]] to i160
419; CHECK-NEXT:    [[PTR_PREV_INT:%.*]] = or i160 [[TMP0]], [[PTR_PREV_INT_OFF]]
420; CHECK-NEXT:    [[PTR_INT:%.*]] = add i160 [[PTR_PREV_INT]], 4
421; CHECK-NEXT:    [[TMP1:%.*]] = lshr i160 [[PTR_INT]], 32
422; CHECK-NEXT:    [[TMP2:%.*]] = trunc i160 [[TMP1]] to i128
423; CHECK-NEXT:    [[PTR_RSRC]] = inttoptr i128 [[TMP2]] to ptr addrspace(8)
424; CHECK-NEXT:    [[PTR_OFF]] = trunc i160 [[PTR_INT]] to i32
425; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
426; CHECK-NEXT:    [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
427; CHECK-NEXT:    [[TEST_EXT:%.*]] = zext i1 [[TEST]] to i32
428; CHECK-NEXT:    switch i32 [[TEST_EXT]], label [[LOOP]] [
429; CHECK-NEXT:      i32 1, label [[LOOP]]
430; CHECK-NEXT:      i32 0, label [[EXIT:%.*]]
431; CHECK-NEXT:    ]
432; CHECK:       exit:
433; CHECK-NEXT:    ret float [[SUM]]
434;
435entry:
436  %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
437  br label %loop
438loop:
439  %sum.prev = phi float [ %sum, %loop ], [ %sum, %loop ], [ 0.0, %entry ]
440  %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ], [ %ptr, %loop ]
441  %i = phi i32 [ %i.next, %loop ], [ 0, %entry ], [ %i.next, %loop ]
442
443  %val = load float, ptr addrspace(7) %ptr.prev
444  %sum = fadd float %sum.prev, %val
445
446  %ptr.prev.int = ptrtoint ptr addrspace(7) %ptr.prev to i160
447  %ptr.int = add i160 %ptr.prev.int, 4
448  %ptr = inttoptr i160 %ptr.int to ptr addrspace(7)
449  %i.next = add i32 %i, 1
450  %test = icmp ult i32 %i.next, %len
451  %test.ext = zext i1 %test to i32
452  switch i32 %test.ext, label %loop [
453  i32 1, label %loop
454  i32 0, label %exit
455  ]
456exit:
457  ret float %sum
458}
459