1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s 3; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s 4 5target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" 6target triple = "amdgcn--" 7 8define void @loads(ptr addrspace(8) %buf) { 9; CHECK-LABEL: define void @loads 10; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0:[0-9]+]] { 11; CHECK-NEXT: [[SCALAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 12; CHECK-NEXT: [[VEC2:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) align 8 [[BUF]], i32 16, i32 0, i32 0) 13; CHECK-NEXT: [[VEC4:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) 14; CHECK-NEXT: [[NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !nontemporal [[META0:![0-9]+]] 15; CHECK-NEXT: [[INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1:![0-9]+]] 16; CHECK-NEXT: [[NONTEMPORAL_INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1]], !nontemporal [[META0]] 17; CHECK-NEXT: [[VOLATILE:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648) 18; CHECK-NEXT: [[VOLATILE_NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648), !nontemporal [[META0]] 19; CHECK-NEXT: fence syncscope("wavefront") release 20; CHECK-NEXT: [[ATOMIC:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648) 21; CHECK-NEXT: fence syncscope("wavefront") acquire 22; CHECK-NEXT: [[ATOMIC_MONOTONIC:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 23; CHECK-NEXT: [[ATOMIC_ACQUIRE:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 24; CHECK-NEXT: fence acquire 25; CHECK-NEXT: ret void 26; 27 %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) 28 %p = getelementptr float, ptr addrspace(7) %base, i32 4 29 30 %scalar = load float, ptr addrspace(7) %p, align 4 31 %vec2 = load <2 x float>, ptr addrspace(7) %p, align 8 32 %vec4 = load <4 x float>, ptr addrspace(7) %p, align 16 33 34 %nontemporal = load float, ptr addrspace(7) %p, !nontemporal !0 35 %invariant = load float, ptr addrspace(7) %p, !invariant.load !1 36 %nontemporal.invariant = load float, ptr addrspace(7) %p, !nontemporal !0, !invariant.load !1 37 38 %volatile = load volatile float, ptr addrspace(7) %p 39 %volatile.nontemporal = load volatile float, ptr addrspace(7) %p, !nontemporal !0 40 41 %atomic = load atomic volatile float, ptr addrspace(7) %p syncscope("wavefront") seq_cst, align 4 42 %atomic.monotonic = load atomic float, ptr addrspace(7) %p syncscope("wavefront") monotonic, align 4 43 %atomic.acquire = load atomic float, ptr addrspace(7) %p acquire, align 4 44 45 ret void 46} 47 48define void @stores(ptr addrspace(8) %buf, float %f, <4 x float> %f4) { 49; CHECK-LABEL: define void @stores 50; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], float [[F:%.*]], <4 x float> [[F4:%.*]]) #[[ATTR0]] { 51; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 52; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[F4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) 53; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !nontemporal [[META0]] 54; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648) 55; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648), !nontemporal [[META0]] 56; CHECK-NEXT: fence syncscope("wavefront") release 57; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648) 58; CHECK-NEXT: fence syncscope("wavefront") acquire 59; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 60; CHECK-NEXT: fence release 61; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 62; CHECK-NEXT: ret void 63; 64 %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) 65 %p = getelementptr float, ptr addrspace(7) %base, i32 4 66 67 store float %f, ptr addrspace(7) %p, align 4 68 store <4 x float> %f4, ptr addrspace(7) %p, align 16 69 70 store float %f, ptr addrspace(7) %p, !nontemporal !0 71 72 store volatile float %f, ptr addrspace(7) %p 73 store volatile float %f, ptr addrspace(7) %p, !nontemporal !0 74 75 store atomic volatile float %f, ptr addrspace(7) %p syncscope("wavefront") seq_cst, align 4 76 store atomic float %f, ptr addrspace(7) %p syncscope("wavefront") monotonic, align 4 77 store atomic float %f, ptr addrspace(7) %p release, align 4 78 79 ret void 80} 81 82define void @atomicrmw(ptr addrspace(8) %buf, float %f, i32 %i) { 83; CHECK-LABEL: define void @atomicrmw 84; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], float [[F:%.*]], i32 [[I:%.*]]) #[[ATTR0]] { 85; CHECK-NEXT: fence syncscope("wavefront") release 86; CHECK-NEXT: [[XCHG:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 87; CHECK-NEXT: fence syncscope("wavefront") acquire 88; CHECK-NEXT: fence syncscope("wavefront") release 89; CHECK-NEXT: [[ADD:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 90; CHECK-NEXT: fence syncscope("wavefront") acquire 91; CHECK-NEXT: fence syncscope("wavefront") release 92; CHECK-NEXT: [[SUB:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 93; CHECK-NEXT: fence syncscope("wavefront") acquire 94; CHECK-NEXT: fence syncscope("wavefront") release 95; CHECK-NEXT: [[AND:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 96; CHECK-NEXT: fence syncscope("wavefront") acquire 97; CHECK-NEXT: fence syncscope("wavefront") release 98; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 99; CHECK-NEXT: fence syncscope("wavefront") acquire 100; CHECK-NEXT: fence syncscope("wavefront") release 101; CHECK-NEXT: [[XOR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 102; CHECK-NEXT: fence syncscope("wavefront") acquire 103; CHECK-NEXT: fence syncscope("wavefront") release 104; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 105; CHECK-NEXT: fence syncscope("wavefront") acquire 106; CHECK-NEXT: fence syncscope("wavefront") release 107; CHECK-NEXT: [[MAX:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 108; CHECK-NEXT: fence syncscope("wavefront") acquire 109; CHECK-NEXT: fence syncscope("wavefront") release 110; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 111; CHECK-NEXT: fence syncscope("wavefront") acquire 112; CHECK-NEXT: fence syncscope("wavefront") release 113; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 114; CHECK-NEXT: fence syncscope("wavefront") acquire 115; CHECK-NEXT: fence syncscope("wavefront") release 116; CHECK-NEXT: [[FADD:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 117; CHECK-NEXT: fence syncscope("wavefront") acquire 118; CHECK-NEXT: fence syncscope("wavefront") release 119; CHECK-NEXT: [[FMAX:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 120; CHECK-NEXT: fence syncscope("wavefront") acquire 121; CHECK-NEXT: fence syncscope("wavefront") release 122; CHECK-NEXT: [[FMIN:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 123; CHECK-NEXT: fence syncscope("wavefront") acquire 124; CHECK-NEXT: fence syncscope("wavefront") release 125; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 126; CHECK-NEXT: fence syncscope("wavefront") acquire 127; CHECK-NEXT: ret void 128; 129 %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) 130 %p = getelementptr float, ptr addrspace(7) %base, i32 4 131 132 ; Fence insertion is tested by loads and stores 133 %xchg = atomicrmw xchg ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4 134 %add = atomicrmw add ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4 135 %sub = atomicrmw sub ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4 136 %and = atomicrmw and ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4 137 %or = atomicrmw or ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4 138 %xor = atomicrmw xor ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4 139 %min = atomicrmw min ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4 140 %max = atomicrmw max ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4 141 %umin = atomicrmw umin ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4 142 %umax = atomicrmw umax ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4 143 144 %fadd = atomicrmw fadd ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4 145 %fmax = atomicrmw fmax ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4 146 %fmin = atomicrmw fmin ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4 147 148 ; Check a no-return atomic 149 atomicrmw add ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4 150 151 ret void 152} 153 154define {i32, i1} @cmpxchg(ptr addrspace(8) %buf, i32 %wanted, i32 %new) { 155; CHECK-LABEL: define { i32, i1 } @cmpxchg 156; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[WANTED:%.*]], i32 [[NEW:%.*]]) #[[ATTR0]] { 157; CHECK-NEXT: fence syncscope("wavefront") release 158; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 [[NEW]], i32 [[WANTED]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648) 159; CHECK-NEXT: fence syncscope("wavefront") acquire 160; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } poison, i32 [[RET]], 0 161; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[RET]], [[WANTED]] 162; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 [[TMP2]], 1 163; CHECK-NEXT: ret { i32, i1 } [[TMP3]] 164; 165 %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) 166 %p = getelementptr i32, ptr addrspace(7) %base, i32 4 167 168 %ret = cmpxchg volatile ptr addrspace(7) %p, i32 %wanted, i32 %new syncscope("wavefront") acq_rel monotonic, align 4 169 ret {i32, i1} %ret 170} 171 172define {i32, i1} @cmpxchg_weak(ptr addrspace(8) %buf, i32 %wanted, i32 %new) { 173; CHECK-LABEL: define { i32, i1 } @cmpxchg_weak 174; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[WANTED:%.*]], i32 [[NEW:%.*]]) #[[ATTR0]] { 175; CHECK-NEXT: fence syncscope("wavefront") release 176; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 [[NEW]], i32 [[WANTED]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0) 177; CHECK-NEXT: fence syncscope("wavefront") acquire 178; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } poison, i32 [[RET]], 0 179; CHECK-NEXT: ret { i32, i1 } [[TMP1]] 180; 181 %base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) 182 %p = getelementptr i32, ptr addrspace(7) %base, i32 4 183 184 %ret = cmpxchg weak ptr addrspace(7) %p, i32 %wanted, i32 %new syncscope("wavefront") acq_rel monotonic, align 4 185 ret {i32, i1} %ret 186} 187 188!0 = ! { i32 1 } 189!1 = ! { } 190