1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 2 // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip \ 3 // RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \ 4 // RUN: -o - | FileCheck %s 5 6 // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip \ 7 // RUN: -aux-triple x86_64-pc-windows-msvc -fcuda-is-device -emit-llvm %s \ 8 // RUN: -o - | FileCheck %s 9 10 #include "Inputs/cuda.h" 11 12 // CHECK-LABEL: @_Z16use_dispatch_ptrPi( 13 // CHECK-NEXT: entry: 14 // CHECK-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8 15 // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8 16 // CHECK-NEXT: [[DISPATCH_PTR:%.*]] = alloca ptr addrspace(4), align 8 17 // CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4) 18 // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4) 19 // CHECK-NEXT: [[DISPATCH_PTR_ASCAST:%.*]] = addrspacecast ptr [[DISPATCH_PTR]] to ptr addrspace(4) 20 // CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8 21 // CHECK-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8 22 // CHECK-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 23 // CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) addrspace(4) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 24 // CHECK-NEXT: store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8 25 // CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8 26 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 27 // CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 28 // CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4 29 // CHECK-NEXT: ret void 30 // 31 __global__ void use_dispatch_ptr(int* out) { 32 const int* dispatch_ptr = (const int*)__builtin_amdgcn_dispatch_ptr(); 33 *out = *dispatch_ptr; 34 } 35 36 // CHECK-LABEL: @_Z13use_queue_ptrPi( 37 // CHECK-NEXT: entry: 38 // CHECK-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8 39 // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8 40 // CHECK-NEXT: [[QUEUE_PTR:%.*]] = alloca ptr addrspace(4), align 8 41 // CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4) 42 // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4) 43 // CHECK-NEXT: [[QUEUE_PTR_ASCAST:%.*]] = addrspacecast ptr [[QUEUE_PTR]] to ptr addrspace(4) 44 // CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8 45 // CHECK-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8 46 // CHECK-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 47 // CHECK-NEXT: [[TMP0:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.queue.ptr() 48 // CHECK-NEXT: store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[QUEUE_PTR_ASCAST]], align 8 49 // CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[QUEUE_PTR_ASCAST]], align 8 50 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 51 // CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 52 // CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4 53 // CHECK-NEXT: ret void 54 // 55 __global__ void use_queue_ptr(int* out) { 56 const int* queue_ptr = (const int*)__builtin_amdgcn_queue_ptr(); 57 *out = *queue_ptr; 58 } 59 60 // CHECK-LABEL: @_Z19use_implicitarg_ptrPi( 61 // CHECK-NEXT: entry: 62 // CHECK-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8 63 // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8 64 // CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = alloca ptr addrspace(4), align 8 65 // CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4) 66 // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4) 67 // CHECK-NEXT: [[IMPLICITARG_PTR_ASCAST:%.*]] = addrspacecast ptr [[IMPLICITARG_PTR]] to ptr addrspace(4) 68 // CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8 69 // CHECK-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8 70 // CHECK-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 71 // CHECK-NEXT: [[TMP0:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 72 // CHECK-NEXT: store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[IMPLICITARG_PTR_ASCAST]], align 8 73 // CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[IMPLICITARG_PTR_ASCAST]], align 8 74 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 75 // CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 76 // CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4 77 // CHECK-NEXT: ret void 78 // 79 __global__ void use_implicitarg_ptr(int* out) { 80 const int* implicitarg_ptr = (const int*)__builtin_amdgcn_implicitarg_ptr(); 81 *out = *implicitarg_ptr; 82 } 83 84 __global__ 85 // 86 void 87 // CHECK-LABEL: @_Z12test_ds_fmaxf( 88 // CHECK-NEXT: entry: 89 // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4 90 // CHECK-NEXT: [[X:%.*]] = alloca float, align 4 91 // CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4) 92 // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) 93 // CHECK-NEXT: store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4 94 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4 95 // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw fmax ptr addrspace(3) @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]] monotonic, align 4 96 // CHECK-NEXT: store volatile float [[TMP1]], ptr addrspace(4) [[X_ASCAST]], align 4 97 // CHECK-NEXT: ret void 98 // 99 test_ds_fmax(float src) { 100 __shared__ float shared; 101 volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false); 102 } 103 104 // CHECK-LABEL: @_Z12test_ds_faddf( 105 // CHECK-NEXT: entry: 106 // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4 107 // CHECK-NEXT: [[X:%.*]] = alloca float, align 4 108 // CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4) 109 // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) 110 // CHECK-NEXT: store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4 111 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4 112 // CHECK-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspace(3) @_ZZ12test_ds_faddfE6shared, float [[TMP0]] monotonic, align 4 113 // CHECK-NEXT: store volatile float [[TMP1]], ptr addrspace(4) [[X_ASCAST]], align 4 114 // CHECK-NEXT: ret void 115 // 116 __global__ void test_ds_fadd(float src) { 117 __shared__ float shared; 118 volatile float x = __builtin_amdgcn_ds_faddf(&shared, src, 0, 0, false); 119 } 120 121 // CHECK-LABEL: @_Z12test_ds_fminfPf( 122 // CHECK-NEXT: entry: 123 // CHECK-NEXT: [[SHARED:%.*]] = alloca ptr addrspace(4), align 8 124 // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4 125 // CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca ptr addrspace(4), align 8 126 // CHECK-NEXT: [[X:%.*]] = alloca float, align 4 127 // CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast ptr [[SHARED]] to ptr addrspace(4) 128 // CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4) 129 // CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SHARED_ADDR]] to ptr addrspace(4) 130 // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) 131 // CHECK-NEXT: store ptr addrspace(1) [[SHARED_COERCE:%.*]], ptr addrspace(4) [[SHARED_ASCAST]], align 8 132 // CHECK-NEXT: [[SHARED1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ASCAST]], align 8 133 // CHECK-NEXT: store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4 134 // CHECK-NEXT: store ptr addrspace(4) [[SHARED1]], ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8 135 // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8 136 // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr addrspace(3) 137 // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4 138 // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4 139 // CHECK-NEXT: store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4 140 // CHECK-NEXT: ret void 141 // 142 __global__ void test_ds_fmin(float src, float *shared) { 143 volatile float x = __builtin_amdgcn_ds_fminf(shared, src, 0, 0, false); 144 } 145 146 #if 0 // FIXME: returning a pointer to AS4 explicitly is wrong for AMDGPU SPIRV 147 // 148 __device__ void test_ret_builtin_nondef_addrspace() { 149 void *x = __builtin_amdgcn_dispatch_ptr(); 150 } 151 #endif 152 153 // CHECK-LABEL: @_Z6endpgmv( 154 // CHECK-NEXT: entry: 155 // CHECK-NEXT: call addrspace(4) void @llvm.amdgcn.endpgm() 156 // CHECK-NEXT: ret void 157 // 158 __global__ void endpgm() { 159 __builtin_amdgcn_endpgm(); 160 } 161 162 // Check the 64 bit argument is correctly passed to the intrinsic without truncation or assertion. 163 164 // CHECK-LABEL: @_Z14test_uicmp_i64Pyyy( 165 // CHECK-NEXT: entry: 166 // CHECK-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8 167 // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8 168 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 169 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 170 // CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4) 171 // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4) 172 // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) 173 // CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) 174 // CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8 175 // CHECK-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8 176 // CHECK-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 177 // CHECK-NEXT: store i64 [[A:%.*]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 178 // CHECK-NEXT: store i64 [[B:%.*]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 8 179 // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 180 // CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(4) [[B_ADDR_ASCAST]], align 8 181 // CHECK-NEXT: [[TMP2:%.*]] = call addrspace(4) i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP0]], i64 [[TMP1]], i32 35) 182 // CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 183 // CHECK-NEXT: store i64 [[TMP2]], ptr addrspace(4) [[TMP3]], align 8 184 // CHECK-NEXT: ret void 185 // 186 __global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, unsigned long long b) 187 { 188 *out = __builtin_amdgcn_uicmpl(a, b, 30+5); 189 } 190 191 // Check the 64 bit return value is correctly returned without truncation or assertion. 192 193 // CHECK-LABEL: @_Z14test_s_memtimePy( 194 // CHECK-NEXT: entry: 195 // CHECK-NEXT: [[OUT:%.*]] = alloca ptr addrspace(4), align 8 196 // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8 197 // CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4) 198 // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4) 199 // CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8 200 // CHECK-NEXT: [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8 201 // CHECK-NEXT: store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 202 // CHECK-NEXT: [[TMP0:%.*]] = call addrspace(4) i64 @llvm.amdgcn.s.memtime() 203 // CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 204 // CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[TMP1]], align 8 205 // CHECK-NEXT: ret void 206 // 207 __global__ void test_s_memtime(unsigned long long* out) 208 { 209 *out = __builtin_amdgcn_s_memtime(); 210 } 211 212 // Check a generic pointer can be passed as a shared pointer and a generic pointer. 213 __device__ void func(float *x); 214 215 // CHECK-LABEL: @_Z17test_ds_fmin_funcfPf( 216 // CHECK-NEXT: entry: 217 // CHECK-NEXT: [[SHARED:%.*]] = alloca ptr addrspace(4), align 8 218 // CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4 219 // CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca ptr addrspace(4), align 8 220 // CHECK-NEXT: [[X:%.*]] = alloca float, align 4 221 // CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast ptr [[SHARED]] to ptr addrspace(4) 222 // CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4) 223 // CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SHARED_ADDR]] to ptr addrspace(4) 224 // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) 225 // CHECK-NEXT: store ptr addrspace(1) [[SHARED_COERCE:%.*]], ptr addrspace(4) [[SHARED_ASCAST]], align 8 226 // CHECK-NEXT: [[SHARED1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ASCAST]], align 8 227 // CHECK-NEXT: store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4 228 // CHECK-NEXT: store ptr addrspace(4) [[SHARED1]], ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8 229 // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8 230 // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr addrspace(3) 231 // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4 232 // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4 233 // CHECK-NEXT: store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4 234 // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8 235 // CHECK-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR6:[0-9]+]] 236 // CHECK-NEXT: ret void 237 // 238 __global__ void test_ds_fmin_func(float src, float *__restrict shared) { 239 volatile float x = __builtin_amdgcn_ds_fminf(shared, src, 0, 0, false); 240 func(shared); 241 } 242 243 // CHECK-LABEL: @_Z14test_is_sharedPf( 244 // CHECK-NEXT: entry: 245 // CHECK-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8 246 // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 247 // CHECK-NEXT: [[RET:%.*]] = alloca i8, align 1 248 // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) 249 // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) 250 // CHECK-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr [[RET]] to ptr addrspace(4) 251 // CHECK-NEXT: store ptr addrspace(1) [[X_COERCE:%.*]], ptr addrspace(4) [[X_ASCAST]], align 8 252 // CHECK-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8 253 // CHECK-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 254 // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 255 // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr 256 // CHECK-NEXT: [[TMP2:%.*]] = call addrspace(4) i1 @llvm.amdgcn.is.shared(ptr [[TMP1]]) 257 // CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP2]] to i8 258 // CHECK-NEXT: store i8 [[STOREDV]], ptr addrspace(4) [[RET_ASCAST]], align 1 259 // CHECK-NEXT: ret void 260 // 261 __global__ void test_is_shared(float *x){ 262 bool ret = __builtin_amdgcn_is_shared(x); 263 } 264 265 // CHECK-LABEL: @_Z15test_is_privatePi( 266 // CHECK-NEXT: entry: 267 // CHECK-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8 268 // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 269 // CHECK-NEXT: [[RET:%.*]] = alloca i8, align 1 270 // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) 271 // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) 272 // CHECK-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr [[RET]] to ptr addrspace(4) 273 // CHECK-NEXT: store ptr addrspace(1) [[X_COERCE:%.*]], ptr addrspace(4) [[X_ASCAST]], align 8 274 // CHECK-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8 275 // CHECK-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 276 // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 277 // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr 278 // CHECK-NEXT: [[TMP2:%.*]] = call addrspace(4) i1 @llvm.amdgcn.is.private(ptr [[TMP1]]) 279 // CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP2]] to i8 280 // CHECK-NEXT: store i8 [[STOREDV]], ptr addrspace(4) [[RET_ASCAST]], align 1 281 // CHECK-NEXT: ret void 282 // 283 __global__ void test_is_private(int *x){ 284 bool ret = __builtin_amdgcn_is_private(x); 285 } 286