1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s 3; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s 4 5target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" 6target triple = "amdgcn--" 7 8;; This should optimize to just the offset part 9define float @sum(ptr addrspace(8) %buf, i32 %len) { 10; CHECK-LABEL: define float @sum 11; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] { 12; CHECK-NEXT: entry: 13; CHECK-NEXT: br label [[LOOP:%.*]] 14; CHECK: loop: 15; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] 16; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 17; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 18; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) 19; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] 20; CHECK-NEXT: [[PTR]] = add i32 [[PTR_PREV_OFF]], 4 21; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 22; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] 23; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] 24; CHECK: exit: 25; CHECK-NEXT: ret float [[SUM]] 26; 27entry: 28 %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) 29 br label %loop 30loop: 31 %sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ] 32 %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ] 33 %i = phi i32 [ %i.next, %loop ], [ 0, %entry ] 34 35 %val = load float, ptr addrspace(7) %ptr.prev 36 %sum = fadd float %sum.prev, %val 37 38 %ptr = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1 39 %i.next = add i32 %i, 1 40 %test = icmp ult i32 %i.next, %len 41 br i1 %test, label %loop, label %exit 42exit: 43 ret float %sum 44} 45 46;; But this should not 47define float @sum_integer_ops(ptr addrspace(8) %buf, i32 %len) { 48; CHECK-LABEL: define float @sum_integer_ops 49; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { 50; CHECK-NEXT: entry: 51; CHECK-NEXT: br label [[LOOP:%.*]] 52; CHECK: loop: 53; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] 54; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 55; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF]], [[ENTRY]] ] 56; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 57; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) 58; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] 59; CHECK-NEXT: [[PTR_PREV_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[PTR_PREV_RSRC]] to i160 60; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i160 [[PTR_PREV_INT_RSRC]], 32 61; CHECK-NEXT: [[PTR_PREV_INT_OFF:%.*]] = zext i32 [[PTR_PREV_OFF]] to i160 62; CHECK-NEXT: [[PTR_PREV_INT:%.*]] = or i160 [[TMP0]], [[PTR_PREV_INT_OFF]] 63; CHECK-NEXT: [[PTR_INT:%.*]] = add i160 [[PTR_PREV_INT]], 4 64; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[PTR_INT]], 32 65; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i128 66; CHECK-NEXT: [[PTR_RSRC]] = inttoptr i128 [[TMP2]] to ptr addrspace(8) 67; CHECK-NEXT: [[PTR_OFF]] = trunc i160 [[PTR_INT]] to i32 68; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 69; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] 70; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] 71; CHECK: exit: 72; CHECK-NEXT: ret float [[SUM]] 73; 74entry: 75 %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) 76 br label %loop 77loop: 78 %sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ] 79 %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ] 80 %i = phi i32 [ %i.next, %loop ], [ 0, %entry ] 81 82 %val = load float, ptr addrspace(7) %ptr.prev 83 %sum = fadd float %sum.prev, %val 84 85 %ptr.prev.int = ptrtoint ptr addrspace(7) %ptr.prev to i160 86 %ptr.int = add i160 %ptr.prev.int, 4 87 %ptr = inttoptr i160 %ptr.int to ptr addrspace(7) 88 %i.next = add i32 %i, 1 89 %test = icmp ult i32 %i.next, %len 90 br i1 %test, label %loop, label %exit 91exit: 92 ret float %sum 93} 94 95;; Should go to offsets only 96define float @sum_2d(ptr addrspace(8) %buf, i32 %ii, i32 %jj) { 97; CHECK-LABEL: define float @sum_2d 98; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[II:%.*]], i32 [[JJ:%.*]]) #[[ATTR0]] { 99; CHECK-NEXT: entry: 100; CHECK-NEXT: br label [[LOOP1_ENTRY:%.*]] 101; CHECK: loop1.entry: 102; CHECK-NEXT: [[SUM1_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP1_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] 103; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP1_EXIT]] ], [ 0, [[ENTRY]] ] 104; CHECK-NEXT: [[PTR1_PREV_OFF:%.*]] = phi i32 [ [[PTR1:%.*]], [[LOOP1_EXIT]] ], [ 0, [[ENTRY]] ] 105; CHECK-NEXT: br label [[LOOP2:%.*]] 106; CHECK: loop2: 107; CHECK-NEXT: [[SUM2_PREV:%.*]] = phi float [ [[SUM]], [[LOOP2]] ], [ [[SUM1_PREV]], [[LOOP1_ENTRY]] ] 108; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[J_NEXT:%.*]], [[LOOP2]] ], [ 0, [[LOOP1_ENTRY]] ] 109; CHECK-NEXT: [[PTR2_PREV_OFF:%.*]] = phi i32 [ [[PTR2:%.*]], [[LOOP2]] ], [ [[PTR1_PREV_OFF]], [[LOOP1_ENTRY]] ] 110; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR2_PREV_OFF]], i32 0, i32 0) 111; CHECK-NEXT: [[SUM]] = fadd float [[SUM2_PREV]], [[VAL]] 112; CHECK-NEXT: [[PTR2]] = add i32 [[PTR2_PREV_OFF]], 4 113; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 114; CHECK-NEXT: [[TEST2:%.*]] = icmp ult i32 [[J_NEXT]], [[JJ]] 115; CHECK-NEXT: br i1 [[TEST2]], label [[LOOP2]], label [[LOOP1_EXIT]] 116; CHECK: loop1.exit: 117; CHECK-NEXT: [[PTR1]] = add i32 [[PTR2]], 4 118; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 119; CHECK-NEXT: [[TEST1:%.*]] = icmp ult i32 [[I_NEXT]], [[II]] 120; CHECK-NEXT: br i1 [[TEST1]], label [[LOOP1_ENTRY]], label [[EXIT:%.*]] 121; CHECK: exit: 122; CHECK-NEXT: ret float [[SUM]] 123; 124entry: 125 %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) 126 br label %loop1.entry 127loop1.entry: 128 %sum1.prev = phi float [ %sum, %loop1.exit ], [ 0.0, %entry ] 129 %ptr1.prev = phi ptr addrspace(7) [ %ptr1, %loop1.exit ], [ %start, %entry ] 130 %i = phi i32 [ %i.next, %loop1.exit ], [ 0, %entry ] 131 132 br label %loop2 133loop2: 134 %sum2.prev = phi float [ %sum, %loop2 ], [ %sum1.prev, %loop1.entry ] 135 %ptr2.prev = phi ptr addrspace(7) [ %ptr2, %loop2 ], [ %ptr1.prev, %loop1.entry ] 136 %j = phi i32 [ %j.next, %loop2 ], [ 0, %loop1.entry ] 137 138 %val = load float, ptr addrspace(7) %ptr2.prev 139 %sum = fadd float %sum2.prev, %val 140 141 %ptr2 = getelementptr float, ptr addrspace(7) %ptr2.prev, i32 1 142 %j.next = add i32 %j, 1 143 %test2 = icmp ult i32 %j.next, %jj 144 145 br i1 %test2, label %loop2, label %loop1.exit 146loop1.exit: 147 %ptr1 = getelementptr float, ptr addrspace(7) %ptr2, i32 1 148 %i.next = add i32 %i, 1 149 %test1 = icmp ult i32 %i.next, %ii 150 br i1 %test1, label %loop1.entry, label %exit 151exit: 152 ret float %sum 153} 154 155;; This should optimize to just the offset parts since all the arguments to the 156;; select point to the same buffer. 157define float @sum_jump_on_negative(ptr addrspace(8) %buf, i32 %len) { 158; CHECK-LABEL: define float @sum_jump_on_negative 159; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { 160; CHECK-NEXT: entry: 161; CHECK-NEXT: br label [[LOOP:%.*]] 162; CHECK: loop: 163; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] 164; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 165; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 166; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) 167; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] 168; CHECK-NEXT: [[SKIP_NEXT:%.*]] = fcmp olt float [[VAL]], 0.000000e+00 169; CHECK-NEXT: [[SMALL_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 4 170; CHECK-NEXT: [[LARGE_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 8 171; CHECK-NEXT: [[PTR_OFF]] = select i1 [[SKIP_NEXT]], i32 [[LARGE_JUMP]], i32 [[SMALL_JUMP]] 172; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 173; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] 174; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] 175; CHECK: exit: 176; CHECK-NEXT: ret float [[SUM]] 177; 178entry: 179 %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) 180 br label %loop 181loop: 182 %sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ] 183 %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ] 184 %i = phi i32 [ %i.next, %loop ], [ 0, %entry ] 185 186 %val = load float, ptr addrspace(7) %ptr.prev 187 %sum = fadd float %sum.prev, %val 188 189 %skip.next = fcmp olt float %val, 0.0 190 %small.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1 191 %large.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 2 192 %ptr = select i1 %skip.next, ptr addrspace(7) %large.jump, ptr addrspace(7) %small.jump 193 194 %i.next = add i32 %i, 1 195 %test = icmp ult i32 %i.next, %len 196 br i1 %test, label %loop, label %exit 197exit: 198 ret float %sum 199} 200 201define float @sum_jump_on_negative_with_phi(ptr addrspace(8) %buf, i32 %len) { 202; CHECK-LABEL: define float @sum_jump_on_negative_with_phi 203; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { 204; CHECK-NEXT: entry: 205; CHECK-NEXT: br label [[LOOP:%.*]] 206; CHECK: loop: 207; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] 208; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ] 209; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ] 210; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) 211; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] 212; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 213; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] 214; CHECK-NEXT: [[SKIP_NEXT:%.*]] = fcmp olt float [[VAL]], 0.000000e+00 215; CHECK-NEXT: br i1 [[SKIP_NEXT]], label [[THEN:%.*]], label [[ELSE:%.*]] 216; CHECK: then: 217; CHECK-NEXT: [[LARGE_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 8 218; CHECK-NEXT: br label [[LOOP_EXIT]] 219; CHECK: else: 220; CHECK-NEXT: [[SMALL_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 4 221; CHECK-NEXT: br label [[LOOP_EXIT]] 222; CHECK: loop.exit: 223; CHECK-NEXT: [[PTR_OFF]] = phi i32 [ [[LARGE_JUMP]], [[THEN]] ], [ [[SMALL_JUMP]], [[ELSE]] ] 224; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] 225; CHECK: exit: 226; CHECK-NEXT: ret float [[SUM]] 227; 228entry: 229 %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) 230 br label %loop 231loop: 232 %sum.prev = phi float [ %sum, %loop.exit ], [ 0.0, %entry ] 233 %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop.exit ], [ %start, %entry ] 234 %i = phi i32 [ %i.next, %loop.exit ], [ 0, %entry ] 235 236 %val = load float, ptr addrspace(7) %ptr.prev 237 %sum = fadd float %sum.prev, %val 238 239 %i.next = add i32 %i, 1 240 %test = icmp ult i32 %i.next, %len 241 242 %skip.next = fcmp olt float %val, 0.0 243 br i1 %skip.next, label %then, label %else 244then: 245 %large.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 2 246 br label %loop.exit 247else: 248 %small.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1 249 br label %loop.exit 250loop.exit: 251 %ptr = phi ptr addrspace(7) [ %large.jump, %then ], [ %small.jump, %else ] 252 br i1 %test, label %loop, label %exit 253exit: 254 ret float %sum 255} 256 257;; But this has a shifting resource part. 258define float @sum_new_buffer_on_negative(ptr addrspace(8) %buf1, ptr addrspace(8) %buf2, i32 %len) { 259; CHECK-LABEL: define float @sum_new_buffer_on_negative 260; CHECK-SAME: (ptr addrspace(8) [[BUF1:%.*]], ptr addrspace(8) [[BUF2:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { 261; CHECK-NEXT: entry: 262; CHECK-NEXT: br label [[LOOP:%.*]] 263; CHECK: loop: 264; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] 265; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 266; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF1]], [[ENTRY]] ] 267; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 268; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) 269; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] 270; CHECK-NEXT: [[HOP:%.*]] = fcmp olt float [[VAL]], 0.000000e+00 271; CHECK-NEXT: [[THIS_NEXT:%.*]] = add i32 [[PTR_PREV_OFF]], 4 272; CHECK-NEXT: [[PTR_RSRC]] = select i1 [[HOP]], ptr addrspace(8) [[PTR_PREV_RSRC]], ptr addrspace(8) [[BUF2]] 273; CHECK-NEXT: [[PTR_OFF]] = select i1 [[HOP]], i32 [[THIS_NEXT]], i32 0 274; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 275; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] 276; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] 277; CHECK: exit: 278; CHECK-NEXT: ret float [[SUM]] 279; 280entry: 281 %start = addrspacecast ptr addrspace(8) %buf1 to ptr addrspace(7) 282 %start2 = addrspacecast ptr addrspace(8) %buf2 to ptr addrspace(7) 283 br label %loop 284loop: 285 %sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ] 286 %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ] 287 %i = phi i32 [ %i.next, %loop ], [ 0, %entry ] 288 289 %val = load float, ptr addrspace(7) %ptr.prev 290 %sum = fadd float %sum.prev, %val 291 292 %hop = fcmp olt float %val, 0.0 293 %this.next = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1 294 %ptr = select i1 %hop, ptr addrspace(7) %this.next, ptr addrspace(7) %start2 295 296 %i.next = add i32 %i, 1 297 %test = icmp ult i32 %i.next, %len 298 br i1 %test, label %loop, label %exit 299exit: 300 ret float %sum 301} 302 303;; As does this. 304define float @sum_new_buffer_on_negative_with_phi(ptr addrspace(8) %buf1, ptr addrspace(8) %buf2, i32 %len) { 305; CHECK-LABEL: define float @sum_new_buffer_on_negative_with_phi 306; CHECK-SAME: (ptr addrspace(8) [[BUF1:%.*]], ptr addrspace(8) [[BUF2:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { 307; CHECK-NEXT: entry: 308; CHECK-NEXT: br label [[LOOP:%.*]] 309; CHECK: loop: 310; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] 311; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ] 312; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP_EXIT]] ], [ [[BUF1]], [[ENTRY]] ] 313; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ] 314; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) 315; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] 316; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 317; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] 318; CHECK-NEXT: [[HOP:%.*]] = fcmp olt float [[VAL]], 0.000000e+00 319; CHECK-NEXT: br i1 [[HOP]], label [[THEN:%.*]], label [[LOOP_EXIT]] 320; CHECK: then: 321; CHECK-NEXT: [[THIS_NEXT:%.*]] = add i32 [[PTR_PREV_OFF]], 4 322; CHECK-NEXT: br label [[LOOP_EXIT]] 323; CHECK: loop.exit: 324; CHECK-NEXT: [[PTR_RSRC]] = phi ptr addrspace(8) [ [[PTR_PREV_RSRC]], [[THEN]] ], [ [[BUF2]], [[LOOP]] ] 325; CHECK-NEXT: [[PTR_OFF]] = phi i32 [ [[THIS_NEXT]], [[THEN]] ], [ 0, [[LOOP]] ] 326; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] 327; CHECK: exit: 328; CHECK-NEXT: ret float [[SUM]] 329; 330entry: 331 %start = addrspacecast ptr addrspace(8) %buf1 to ptr addrspace(7) 332 %start2 = addrspacecast ptr addrspace(8) %buf2 to ptr addrspace(7) 333 br label %loop 334loop: 335 %sum.prev = phi float [ %sum, %loop.exit ], [ 0.0, %entry ] 336 %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop.exit ], [ %start, %entry ] 337 %i = phi i32 [ %i.next, %loop.exit ], [ 0, %entry ] 338 339 %val = load float, ptr addrspace(7) %ptr.prev 340 %sum = fadd float %sum.prev, %val 341 342 %i.next = add i32 %i, 1 343 %test = icmp ult i32 %i.next, %len 344 %hop = fcmp olt float %val, 0.0 345 br i1 %hop, label %then, label %loop.exit 346then: 347 %this.next = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1 348 br label %loop.exit 349loop.exit: 350 %ptr = phi ptr addrspace(7) [ %this.next, %then ], [ %start2, %loop ] 351 br i1 %test, label %loop, label %exit 352exit: 353 ret float %sum 354} 355 356;; Test that the uniform buffer descriptor optimization works correctly for phi 357;; nodes that repeat the same predecessor multiple times. 358define float @sum_duplicate_preds(ptr addrspace(8) %buf, i32 %len) { 359; CHECK-LABEL: define float @sum_duplicate_preds 360; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { 361; CHECK-NEXT: entry: 362; CHECK-NEXT: br label [[LOOP:%.*]] 363; CHECK: loop: 364; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ [[SUM]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] 365; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[I_NEXT]], [[LOOP]] ] 366; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[PTR]], [[LOOP]] ] 367; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) 368; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] 369; CHECK-NEXT: [[PTR]] = add i32 [[PTR_PREV_OFF]], 4 370; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 371; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] 372; CHECK-NEXT: [[TEST_EXT:%.*]] = zext i1 [[TEST]] to i32 373; CHECK-NEXT: switch i32 [[TEST_EXT]], label [[LOOP]] [ 374; CHECK-NEXT: i32 1, label [[LOOP]] 375; CHECK-NEXT: i32 0, label [[EXIT:%.*]] 376; CHECK-NEXT: ] 377; CHECK: exit: 378; CHECK-NEXT: ret float [[SUM]] 379; 380entry: 381 %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) 382 br label %loop 383loop: 384 %sum.prev = phi float [ %sum, %loop ], [ %sum, %loop ], [ 0.0, %entry ] 385 %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ], [ %ptr, %loop ] 386 %i = phi i32 [ %i.next, %loop ], [ 0, %entry ], [ %i.next, %loop ] 387 388 %val = load float, ptr addrspace(7) %ptr.prev 389 %sum = fadd float %sum.prev, %val 390 391 %ptr = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1 392 %i.next = add i32 %i, 1 393 %test = icmp ult i32 %i.next, %len 394 %test.ext = zext i1 %test to i32 395 switch i32 %test.ext, label %loop [ 396 i32 1, label %loop 397 i32 0, label %exit 398 ] 399exit: 400 ret float %sum 401} 402 403;; And similirly check the "might not be uniform" case. 404define float @sum_integer_ops_duplicate_preds(ptr addrspace(8) %buf, i32 %len) { 405; CHECK-LABEL: define float @sum_integer_ops_duplicate_preds 406; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] { 407; CHECK-NEXT: entry: 408; CHECK-NEXT: br label [[LOOP:%.*]] 409; CHECK: loop: 410; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ [[SUM]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] 411; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[I_NEXT]], [[LOOP]] ] 412; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF]], [[ENTRY]] ], [ [[PTR_RSRC]], [[LOOP]] ] 413; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[PTR_OFF]], [[LOOP]] ] 414; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0) 415; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]] 416; CHECK-NEXT: [[PTR_PREV_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[PTR_PREV_RSRC]] to i160 417; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i160 [[PTR_PREV_INT_RSRC]], 32 418; CHECK-NEXT: [[PTR_PREV_INT_OFF:%.*]] = zext i32 [[PTR_PREV_OFF]] to i160 419; CHECK-NEXT: [[PTR_PREV_INT:%.*]] = or i160 [[TMP0]], [[PTR_PREV_INT_OFF]] 420; CHECK-NEXT: [[PTR_INT:%.*]] = add i160 [[PTR_PREV_INT]], 4 421; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[PTR_INT]], 32 422; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i128 423; CHECK-NEXT: [[PTR_RSRC]] = inttoptr i128 [[TMP2]] to ptr addrspace(8) 424; CHECK-NEXT: [[PTR_OFF]] = trunc i160 [[PTR_INT]] to i32 425; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 426; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]] 427; CHECK-NEXT: [[TEST_EXT:%.*]] = zext i1 [[TEST]] to i32 428; CHECK-NEXT: switch i32 [[TEST_EXT]], label [[LOOP]] [ 429; CHECK-NEXT: i32 1, label [[LOOP]] 430; CHECK-NEXT: i32 0, label [[EXIT:%.*]] 431; CHECK-NEXT: ] 432; CHECK: exit: 433; CHECK-NEXT: ret float [[SUM]] 434; 435entry: 436 %start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) 437 br label %loop 438loop: 439 %sum.prev = phi float [ %sum, %loop ], [ %sum, %loop ], [ 0.0, %entry ] 440 %ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ], [ %ptr, %loop ] 441 %i = phi i32 [ %i.next, %loop ], [ 0, %entry ], [ %i.next, %loop ] 442 443 %val = load float, ptr addrspace(7) %ptr.prev 444 %sum = fadd float %sum.prev, %val 445 446 %ptr.prev.int = ptrtoint ptr addrspace(7) %ptr.prev to i160 447 %ptr.int = add i160 %ptr.prev.int, 4 448 %ptr = inttoptr i160 %ptr.int to ptr addrspace(7) 449 %i.next = add i32 %i, 1 450 %test = icmp ult i32 %i.next, %len 451 %test.ext = zext i1 %test to i32 452 switch i32 %test.ext, label %loop [ 453 i32 1, label %loop 454 i32 0, label %exit 455 ] 456exit: 457 ret float %sum 458} 459