1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 2 // REQUIRES: x86-registered-target 3 // REQUIRES: amdgpu-registered-target 4 5 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=CHECK %s 6 // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=CHECK-SPIRV %s 7 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=OPT 8 // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=OPT-SPIRV 9 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s 10 11 #include "Inputs/cuda.h" 12 13 // Coerced struct from `struct S` without all generic pointers lowered into 14 // global ones. 15 16 // On the host-side compilation, generic pointer won't be coerced. 17 18 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel1Pi( 19 // CHECK-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { 20 // CHECK-NEXT: [[ENTRY:.*:]] 21 // CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5) 22 // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 23 // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr 24 // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr 25 // CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8 26 // CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8 27 // CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8 28 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 29 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 30 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 31 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 32 // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 33 // CHECK-NEXT: ret void 34 // 35 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi( 36 // CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] !max_work_group_size [[META5:![0-9]+]] { 37 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] 38 // CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8 39 // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 40 // CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) 41 // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) 42 // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8 43 // CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8 44 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 45 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 46 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0 47 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 48 // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 49 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 50 // CHECK-SPIRV-NEXT: ret void 51 // 52 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel1Pi( 53 // OPT-SAME: ptr addrspace(1) noundef captures(none) [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { 54 // OPT-NEXT: [[ENTRY:.*:]] 55 // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 56 // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 57 // OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 58 // OPT-NEXT: ret void 59 // 60 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi( 61 // OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] !max_work_group_size [[META5:![0-9]+]] { 62 // OPT-SPIRV-NEXT: [[ENTRY:.*:]] 63 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 64 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) 65 // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 66 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 67 // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 68 // OPT-SPIRV-NEXT: ret void 69 // 70 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel1Pi( 71 // HOST-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { 72 // HOST-NEXT: [[ENTRY:.*:]] 73 // HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 74 // HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 75 // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0) 76 // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 77 // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] 78 // HOST: [[SETUP_NEXT]]: 79 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel1Pi) 80 // HOST-NEXT: br label %[[SETUP_END]] 81 // HOST: [[SETUP_END]]: 82 // HOST-NEXT: ret void 83 // 84 __global__ void kernel1(int *x) { 85 x[0]++; 86 } 87 88 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel2Ri( 89 // CHECK-SAME: ptr addrspace(1) noundef nonnull align 4 dereferenceable(4) [[X_COERCE:%.*]]) #[[ATTR0]] { 90 // CHECK-NEXT: [[ENTRY:.*:]] 91 // CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5) 92 // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 93 // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr 94 // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr 95 // CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8 96 // CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8 97 // CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8 98 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 99 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 100 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 101 // CHECK-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 102 // CHECK-NEXT: ret void 103 // 104 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri( 105 // CHECK-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { 106 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] 107 // CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8 108 // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 109 // CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) 110 // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) 111 // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8 112 // CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8 113 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 114 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 115 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 116 // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 117 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4 118 // CHECK-SPIRV-NEXT: ret void 119 // 120 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel2Ri( 121 // OPT-SAME: ptr addrspace(1) noundef nonnull align 4 captures(none) dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { 122 // OPT-NEXT: [[ENTRY:.*:]] 123 // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 124 // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 125 // OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 126 // OPT-NEXT: ret void 127 // 128 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri( 129 // OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { 130 // OPT-SPIRV-NEXT: [[ENTRY:.*:]] 131 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 132 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) 133 // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 134 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 135 // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 136 // OPT-SPIRV-NEXT: ret void 137 // 138 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel2Ri( 139 // HOST-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 140 // HOST-NEXT: [[ENTRY:.*:]] 141 // HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 142 // HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 143 // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0) 144 // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 145 // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] 146 // HOST: [[SETUP_NEXT]]: 147 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel2Ri) 148 // HOST-NEXT: br label %[[SETUP_END]] 149 // HOST: [[SETUP_END]]: 150 // HOST-NEXT: ret void 151 // 152 __global__ void kernel2(int &x) { 153 x++; 154 } 155 156 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i( 157 // CHECK-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) #[[ATTR0]] { 158 // CHECK-NEXT: [[ENTRY:.*:]] 159 // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8, addrspace(5) 160 // CHECK-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 161 // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr 162 // CHECK-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y_ADDR]] to ptr 163 // CHECK-NEXT: store ptr addrspace(2) [[X]], ptr [[X_ADDR_ASCAST]], align 8 164 // CHECK-NEXT: store ptr addrspace(1) [[Y]], ptr [[Y_ADDR_ASCAST]], align 8 165 // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(2), ptr [[X_ADDR_ASCAST]], align 8 166 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[TMP0]], i64 0 167 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[ARRAYIDX]], align 4 168 // CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[Y_ADDR_ASCAST]], align 8 169 // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP2]], i64 0 170 // CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX1]], align 4 171 // CHECK-NEXT: ret void 172 // 173 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i( 174 // CHECK-SPIRV-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { 175 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] 176 // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8 177 // CHECK-SPIRV-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8 178 // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) 179 // CHECK-SPIRV-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr [[Y_ADDR]] to ptr addrspace(4) 180 // CHECK-SPIRV-NEXT: store ptr addrspace(2) [[X]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 181 // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[Y]], ptr addrspace(4) [[Y_ADDR_ASCAST]], align 8 182 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(2), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 183 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[TMP0]], i64 0 184 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[ARRAYIDX]], align 4 185 // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[Y_ADDR_ASCAST]], align 8 186 // CHECK-SPIRV-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP2]], i64 0 187 // CHECK-SPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX1]], align 4 188 // CHECK-SPIRV-NEXT: ret void 189 // 190 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i( 191 // OPT-SAME: ptr addrspace(2) noundef readonly captures(none) [[X:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { 192 // OPT-NEXT: [[ENTRY:.*:]] 193 // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4 194 // OPT-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4 195 // OPT-NEXT: ret void 196 // 197 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i( 198 // OPT-SPIRV-SAME: ptr addrspace(2) noundef readonly captures(none) [[X:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] !max_work_group_size [[META5]] { 199 // OPT-SPIRV-NEXT: [[ENTRY:.*:]] 200 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4 201 // OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4 202 // OPT-SPIRV-NEXT: ret void 203 // 204 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i( 205 // HOST-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) #[[ATTR0]] { 206 // HOST-NEXT: [[ENTRY:.*:]] 207 // HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8 208 // HOST-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8 209 // HOST-NEXT: store ptr addrspace(2) [[X]], ptr [[X_ADDR]], align 8 210 // HOST-NEXT: store ptr addrspace(1) [[Y]], ptr [[Y_ADDR]], align 8 211 // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0) 212 // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 213 // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] 214 // HOST: [[SETUP_NEXT]]: 215 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[Y_ADDR]], i64 8, i64 8) 216 // HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 217 // HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT1:.*]], label %[[SETUP_END]] 218 // HOST: [[SETUP_NEXT1]]: 219 // HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel3PU3AS2iPU3AS1i) 220 // HOST-NEXT: br label %[[SETUP_END]] 221 // HOST: [[SETUP_END]]: 222 // HOST-NEXT: ret void 223 // 224 __global__ void kernel3(__attribute__((address_space(2))) int *x, 225 __attribute__((address_space(1))) int *y) { 226 y[0] = x[0]; 227 } 228 229 // CHECK-LABEL: define dso_local void @_Z4funcPi( 230 // CHECK-SAME: ptr noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] { 231 // CHECK-NEXT: [[ENTRY:.*:]] 232 // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 233 // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr 234 // CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8 235 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 236 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 237 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 238 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 239 // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 240 // CHECK-NEXT: ret void 241 // 242 // CHECK-SPIRV-LABEL: define spir_func void @_Z4funcPi( 243 // CHECK-SPIRV-SAME: ptr addrspace(4) noundef [[X:%.*]]) addrspace(4) #[[ATTR1:[0-9]+]] { 244 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] 245 // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 246 // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) 247 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 248 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 249 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0 250 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 251 // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 252 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 253 // CHECK-SPIRV-NEXT: ret void 254 // 255 // OPT-LABEL: define dso_local void @_Z4funcPi( 256 // OPT-SAME: ptr noundef captures(none) [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { 257 // OPT-NEXT: [[ENTRY:.*:]] 258 // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4 259 // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 260 // OPT-NEXT: store i32 [[INC]], ptr [[X]], align 4 261 // OPT-NEXT: ret void 262 // 263 // OPT-SPIRV-LABEL: define spir_func void @_Z4funcPi( 264 // OPT-SPIRV-SAME: ptr addrspace(4) noundef captures(none) [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] { 265 // OPT-SPIRV-NEXT: [[ENTRY:.*:]] 266 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[X]], align 4 267 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 268 // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[X]], align 4 269 // OPT-SPIRV-NEXT: ret void 270 // 271 __device__ void func(int *x) { 272 x[0]++; 273 } 274 275 struct S { 276 int *x; 277 float *y; 278 }; 279 // `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect 280 // by-val). However, the enhanced address inferring pass should be able to 281 // assume they are global pointers. 282 // For SPIR-V, since byref is not supported at the moment, we pass it as direct. 283 284 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel41S( 285 // CHECK-SAME: ptr addrspace(4) noundef byref([[STRUCT_S:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { 286 // CHECK-NEXT: [[ENTRY:.*:]] 287 // CHECK-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_S]], align 8, addrspace(5) 288 // CHECK-NEXT: [[S:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr 289 // CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[S]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) 290 // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0 291 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X]], align 8 292 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 293 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 294 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 295 // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 296 // CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1 297 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[Y]], align 8 298 // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 0 299 // CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 300 // CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP4]], 1.000000e+00 301 // CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX1]], align 4 302 // CHECK-NEXT: ret void 303 // 304 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S( 305 // CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { 306 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] 307 // CHECK-SPIRV-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 8 308 // CHECK-SPIRV-NEXT: [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4) 309 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0 310 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0 311 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8 312 // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1 313 // CHECK-SPIRV-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1 314 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP2]], align 8 315 // CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0 316 // CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8 317 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP4]], i64 0 318 // CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 319 // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 320 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 321 // CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1 322 // CHECK-SPIRV-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 323 // CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP6]], i64 0 324 // CHECK-SPIRV-NEXT: [[TMP7:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4 325 // CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP7]], 1.000000e+00 326 // CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4 327 // CHECK-SPIRV-NEXT: ret void 328 // 329 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel41S( 330 // OPT-SAME: ptr addrspace(4) noundef readonly byref([[STRUCT_S:%.*]]) align 8 captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { 331 // OPT-NEXT: [[ENTRY:.*:]] 332 // OPT-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8, !amdgpu.noclobber [[META4:![0-9]+]] 333 // OPT-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[COERCE_SROA_0_0_COPYLOAD]] to ptr addrspace(1) 334 // OPT-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 8 335 // OPT-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[COERCE_SROA_2_0__SROA_IDX]], align 8, !amdgpu.noclobber [[META4]] 336 // OPT-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[COERCE_SROA_2_0_COPYLOAD]] to ptr addrspace(1) 337 // OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4, !amdgpu.noclobber [[META4]] 338 // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 339 // OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[TMP1]], align 4 340 // OPT-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(1) [[TMP2]], align 4 341 // OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP4]], 1.000000e+00 342 // OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[TMP2]], align 4 343 // OPT-NEXT: ret void 344 // 345 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S( 346 // OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { 347 // OPT-SPIRV-NEXT: [[ENTRY:.*:]] 348 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0 349 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1 350 // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 351 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 352 // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4 353 // OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP1]], align 4 354 // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 355 // OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP1]], align 4 356 // OPT-SPIRV-NEXT: ret void 357 // 358 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel41S( 359 // HOST-SAME: ptr [[S_COERCE0:%.*]], ptr [[S_COERCE1:%.*]]) #[[ATTR0]] { 360 // HOST-NEXT: [[ENTRY:.*:]] 361 // HOST-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 8 362 // HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[S]], i32 0, i32 0 363 // HOST-NEXT: store ptr [[S_COERCE0]], ptr [[TMP0]], align 8 364 // HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[S]], i32 0, i32 1 365 // HOST-NEXT: store ptr [[S_COERCE1]], ptr [[TMP1]], align 8 366 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[S]], i64 16, i64 0) 367 // HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 368 // HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] 369 // HOST: [[SETUP_NEXT]]: 370 // HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel41S) 371 // HOST-NEXT: br label %[[SETUP_END]] 372 // HOST: [[SETUP_END]]: 373 // HOST-NEXT: ret void 374 // 375 __global__ void kernel4(struct S s) { 376 s.x[0]++; 377 s.y[0] += 1.f; 378 } 379 380 // If a pointer to struct is passed, only the pointer itself is coerced into the global one. 381 382 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S( 383 // CHECK-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) #[[ATTR0]] { 384 // CHECK-NEXT: [[ENTRY:.*:]] 385 // CHECK-NEXT: [[S:%.*]] = alloca ptr, align 8, addrspace(5) 386 // CHECK-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 387 // CHECK-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S]] to ptr 388 // CHECK-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_ADDR]] to ptr 389 // CHECK-NEXT: store ptr addrspace(1) [[S_COERCE]], ptr [[S_ASCAST]], align 8 390 // CHECK-NEXT: [[S1:%.*]] = load ptr, ptr [[S_ASCAST]], align 8 391 // CHECK-NEXT: store ptr [[S1]], ptr [[S_ADDR_ASCAST]], align 8 392 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR_ASCAST]], align 8 393 // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP0]], i32 0, i32 0 394 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X]], align 8 395 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 396 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 397 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 398 // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 399 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[S_ADDR_ASCAST]], align 8 400 // CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP3]], i32 0, i32 1 401 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[Y]], align 8 402 // CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 0 403 // CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 404 // CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00 405 // CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4 406 // CHECK-NEXT: ret void 407 // 408 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S( 409 // CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { 410 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] 411 // CHECK-SPIRV-NEXT: [[S:%.*]] = alloca ptr addrspace(4), align 8 412 // CHECK-SPIRV-NEXT: [[S_ADDR:%.*]] = alloca ptr addrspace(4), align 8 413 // CHECK-SPIRV-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4) 414 // CHECK-SPIRV-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr [[S_ADDR]] to ptr addrspace(4) 415 // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[S_COERCE]], ptr addrspace(4) [[S_ASCAST]], align 8 416 // CHECK-SPIRV-NEXT: [[S1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ASCAST]], align 8 417 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[S1]], ptr addrspace(4) [[S_ADDR_ASCAST]], align 8 418 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ADDR_ASCAST]], align 8 419 // CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr addrspace(4) [[TMP0]], i32 0, i32 0 420 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8 421 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 0 422 // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 423 // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 424 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 425 // CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ADDR_ASCAST]], align 8 426 // CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[TMP3]], i32 0, i32 1 427 // CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 428 // CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP4]], i64 0 429 // CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4 430 // CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00 431 // CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4 432 // CHECK-SPIRV-NEXT: ret void 433 // 434 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S( 435 // OPT-SAME: ptr addrspace(1) noundef readonly captures(none) [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] { 436 // OPT-NEXT: [[ENTRY:.*:]] 437 // OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(1) [[S_COERCE]], align 8 438 // OPT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 439 // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 440 // OPT-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 441 // OPT-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[S_COERCE]], i64 8 442 // OPT-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(1) [[Y]], align 8 443 // OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4 444 // OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 445 // OPT-NEXT: store float [[ADD]], ptr [[TMP2]], align 4 446 // OPT-NEXT: ret void 447 // 448 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S( 449 // OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { 450 // OPT-SPIRV-NEXT: [[ENTRY:.*:]] 451 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64 452 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) 453 // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[TMP1]], align 8 454 // OPT-SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4 455 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 456 // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP2]], align 4 457 // OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8 458 // OPT-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 459 // OPT-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[TMP4]], align 4 460 // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00 461 // OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP4]], align 4 462 // OPT-SPIRV-NEXT: ret void 463 // 464 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel5P1S( 465 // HOST-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] { 466 // HOST-NEXT: [[ENTRY:.*:]] 467 // HOST-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 468 // HOST-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 469 // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[S_ADDR]], i64 8, i64 0) 470 // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 471 // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] 472 // HOST: [[SETUP_NEXT]]: 473 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel5P1S) 474 // HOST-NEXT: br label %[[SETUP_END]] 475 // HOST: [[SETUP_END]]: 476 // HOST-NEXT: ret void 477 // 478 __global__ void kernel5(struct S *s) { 479 s->x[0]++; 480 s->y[0] += 1.f; 481 } 482 483 struct T { 484 float *x[2]; 485 }; 486 // `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect 487 // by-val). However, the enhanced address inferring pass should be able to 488 // assume they are global pointers. 489 // For SPIR-V, since byref is not supported at the moment, we pass it as direct. 490 491 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel61T( 492 // CHECK-SAME: ptr addrspace(4) noundef byref([[STRUCT_T:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { 493 // CHECK-NEXT: [[ENTRY:.*:]] 494 // CHECK-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_T]], align 8, addrspace(5) 495 // CHECK-NEXT: [[T:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr 496 // CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[T]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) 497 // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr [[T]], i32 0, i32 0 498 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr], ptr [[X]], i64 0, i64 0 499 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 500 // CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 0 501 // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 502 // CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP2]], 1.000000e+00 503 // CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX1]], align 4 504 // CHECK-NEXT: [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr [[T]], i32 0, i32 0 505 // CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[X2]], i64 0, i64 1 506 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 507 // CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 0 508 // CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 509 // CHECK-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP4]], 2.000000e+00 510 // CHECK-NEXT: store float [[ADD5]], ptr [[ARRAYIDX4]], align 4 511 // CHECK-NEXT: ret void 512 // 513 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T( 514 // CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { 515 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] 516 // CHECK-SPIRV-NEXT: [[T:%.*]] = alloca [[STRUCT_T]], align 8 517 // CHECK-SPIRV-NEXT: [[T1:%.*]] = addrspacecast ptr [[T]] to ptr addrspace(4) 518 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0 519 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0 520 // CHECK-SPIRV-NEXT: store [2 x ptr addrspace(4)] [[TMP1]], ptr addrspace(4) [[TMP0]], align 8 521 // CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0 522 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X]], i64 0, i64 0 523 // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8 524 // CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 0 525 // CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4 526 // CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 527 // CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4 528 // CHECK-SPIRV-NEXT: [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0 529 // CHECK-SPIRV-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X3]], i64 0, i64 1 530 // CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX4]], align 8 531 // CHECK-SPIRV-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP4]], i64 0 532 // CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX5]], align 4 533 // CHECK-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP5]], 2.000000e+00 534 // CHECK-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[ARRAYIDX5]], align 4 535 // CHECK-SPIRV-NEXT: ret void 536 // 537 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel61T( 538 // OPT-SAME: ptr addrspace(4) noundef readonly byref([[STRUCT_T:%.*]]) align 8 captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] { 539 // OPT-NEXT: [[ENTRY:.*:]] 540 // OPT-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8, !amdgpu.noclobber [[META4]] 541 // OPT-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[COERCE_SROA_0_0_COPYLOAD]] to ptr addrspace(1) 542 // OPT-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 8 543 // OPT-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[COERCE_SROA_2_0__SROA_IDX]], align 8, !amdgpu.noclobber [[META4]] 544 // OPT-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[COERCE_SROA_2_0_COPYLOAD]] to ptr addrspace(1) 545 // OPT-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(1) [[TMP1]], align 4, !amdgpu.noclobber [[META4]] 546 // OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 547 // OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[TMP1]], align 4 548 // OPT-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(1) [[TMP2]], align 4 549 // OPT-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP4]], 2.000000e+00 550 // OPT-NEXT: store float [[ADD5]], ptr addrspace(1) [[TMP2]], align 4 551 // OPT-NEXT: ret void 552 // 553 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T( 554 // OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { 555 // OPT-SPIRV-NEXT: [[ENTRY:.*:]] 556 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0 557 // OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0 558 // OPT-SPIRV-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 1 559 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4 560 // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 1.000000e+00 561 // OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4 562 // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4 563 // OPT-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP2]], 2.000000e+00 564 // OPT-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4 565 // OPT-SPIRV-NEXT: ret void 566 // 567 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel61T( 568 // HOST-SAME: ptr [[T_COERCE0:%.*]], ptr [[T_COERCE1:%.*]]) #[[ATTR0]] { 569 // HOST-NEXT: [[ENTRY:.*:]] 570 // HOST-NEXT: [[T:%.*]] = alloca [[STRUCT_T:%.*]], align 8 571 // HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[T]], i32 0, i32 0 572 // HOST-NEXT: store ptr [[T_COERCE0]], ptr [[TMP0]], align 8 573 // HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[T]], i32 0, i32 1 574 // HOST-NEXT: store ptr [[T_COERCE1]], ptr [[TMP1]], align 8 575 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[T]], i64 16, i64 0) 576 // HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 577 // HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] 578 // HOST: [[SETUP_NEXT]]: 579 // HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel61T) 580 // HOST-NEXT: br label %[[SETUP_END]] 581 // HOST: [[SETUP_END]]: 582 // HOST-NEXT: ret void 583 // 584 __global__ void kernel6(struct T t) { 585 t.x[0][0] += 1.f; 586 t.x[1][0] += 2.f; 587 } 588 589 // Check that coerced pointers retain the noalias attribute when qualified with __restrict. 590 591 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel7Pi( 592 // CHECK-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) #[[ATTR0]] { 593 // CHECK-NEXT: [[ENTRY:.*:]] 594 // CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5) 595 // CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 596 // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr 597 // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr 598 // CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8 599 // CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8 600 // CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8 601 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 602 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 603 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 604 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 605 // CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 606 // CHECK-NEXT: ret void 607 // 608 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi( 609 // CHECK-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { 610 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] 611 // CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8 612 // CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 613 // CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) 614 // CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) 615 // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8 616 // CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8 617 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 618 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 619 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0 620 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 621 // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 622 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 623 // CHECK-SPIRV-NEXT: ret void 624 // 625 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel7Pi( 626 // OPT-SAME: ptr addrspace(1) noalias noundef captures(none) [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { 627 // OPT-NEXT: [[ENTRY:.*:]] 628 // OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 629 // OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 630 // OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 631 // OPT-NEXT: ret void 632 // 633 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi( 634 // OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { 635 // OPT-SPIRV-NEXT: [[ENTRY:.*:]] 636 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 637 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) 638 // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 639 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 640 // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 641 // OPT-SPIRV-NEXT: ret void 642 // 643 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel7Pi( 644 // HOST-SAME: ptr noalias noundef [[X:%.*]]) #[[ATTR0]] { 645 // HOST-NEXT: [[ENTRY:.*:]] 646 // HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 647 // HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 648 // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0) 649 // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 650 // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] 651 // HOST: [[SETUP_NEXT]]: 652 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel7Pi) 653 // HOST-NEXT: br label %[[SETUP_END]] 654 // HOST: [[SETUP_END]]: 655 // HOST-NEXT: ret void 656 // 657 __global__ void kernel7(int *__restrict x) { 658 x[0]++; 659 } 660 661 // Single element struct. 662 struct SS { 663 float *x; 664 }; 665 // CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel82SS( 666 // CHECK-SAME: ptr addrspace(1) [[A_COERCE:%.*]]) #[[ATTR0]] { 667 // CHECK-NEXT: [[ENTRY:.*:]] 668 // CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_SS:%.*]], align 8, addrspace(5) 669 // CHECK-NEXT: [[A1:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr 670 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A1]], i32 0, i32 0 671 // CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 672 // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A1]], i32 0, i32 0 673 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 674 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4 675 // CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00 676 // CHECK-NEXT: store float [[ADD]], ptr [[TMP0]], align 4 677 // CHECK-NEXT: ret void 678 // 679 // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS( 680 // CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { 681 // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] 682 // CHECK-SPIRV-NEXT: [[A:%.*]] = alloca [[STRUCT_SS]], align 8 683 // CHECK-SPIRV-NEXT: [[A1:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) 684 // CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0 685 // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0 686 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8 687 // CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0 688 // CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8 689 // CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4 690 // CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 3.000000e+00 691 // CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4 692 // CHECK-SPIRV-NEXT: ret void 693 // 694 // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel82SS( 695 // OPT-SAME: ptr addrspace(1) captures(none) [[A_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { 696 // OPT-NEXT: [[ENTRY:.*:]] 697 // OPT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(1) [[A_COERCE]], align 4 698 // OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP0]], 3.000000e+00 699 // OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[A_COERCE]], align 4 700 // OPT-NEXT: ret void 701 // 702 // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS( 703 // OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { 704 // OPT-SPIRV-NEXT: [[ENTRY:.*:]] 705 // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0 706 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4 707 // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00 708 // OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP0]], align 4 709 // OPT-SPIRV-NEXT: ret void 710 // 711 // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel82SS( 712 // HOST-SAME: ptr [[A_COERCE:%.*]]) #[[ATTR0]] { 713 // HOST-NEXT: [[ENTRY:.*:]] 714 // HOST-NEXT: [[A:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 715 // HOST-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A]], i32 0, i32 0 716 // HOST-NEXT: store ptr [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 717 // HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[A]], i64 8, i64 0) 718 // HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 719 // HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] 720 // HOST: [[SETUP_NEXT]]: 721 // HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel82SS) 722 // HOST-NEXT: br label %[[SETUP_END]] 723 // HOST: [[SETUP_END]]: 724 // HOST-NEXT: ret void 725 // 726 __global__ void kernel8(struct SS a) { 727 *a.x += 3.f; 728 } 729 //. 730 // CHECK-SPIRV: [[META5]] = !{i32 1024, i32 1, i32 1} 731 //. 732 // OPT: [[META4]] = !{} 733 //. 734 // OPT-SPIRV: [[META5]] = !{i32 1024, i32 1, i32 1} 735 //. 736