1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // Test target codegen - host bc file has to be created first. 3 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 4 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 5 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 6 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 7 // RUN: %clang_cc1 -no-enable-noundef-analysis -verify -Wno-vla -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 8 // expected-no-diagnostics 9 10 #ifndef HEADER 11 #define HEADER 12 13 __thread int id; 14 15 int baz(int f, double &a); 16 17 template <typename tx, typename ty> 18 struct TT { 19 tx X; 20 ty Y; 21 tx &operator[](int i) { return X; } 22 }; 23 24 void targetBar(int *Ptr1, int *Ptr2) { 25 #pragma omp target map(Ptr1[:0], Ptr2) 26 #pragma omp parallel num_threads(2) 27 *Ptr1 = *Ptr2; 28 } 29 30 int foo(int n) { 31 int a = 0; 32 short aa = 0; 33 float b[10]; 34 float bn[n]; 35 double c[5][10]; 36 double cn[5][n]; 37 TT<long long, char> d; 38 39 #pragma omp target 40 { 41 } 42 43 #pragma omp target if (0) 44 { 45 } 46 47 #pragma omp target if (1) 48 { 49 aa += 1; 50 aa += 2; 51 } 52 53 #pragma omp target if (n > 20) 54 { 55 a += 1; 56 b[2] += 1.0; 57 bn[3] += 1.0; 58 c[1][2] += 1.0; 59 cn[1][3] += 1.0; 60 d.X += 1; 61 d.Y += 1; 62 d[0] += 1; 63 } 64 65 return a; 66 } 67 68 template <typename tx> 69 tx ftemplate(int n) { 70 tx a = 0; 71 short aa = 0; 72 tx b[10]; 73 74 #pragma omp target if (n > 40) 75 { 76 a += 1; 77 aa += 1; 78 b[2] += 1; 79 } 80 81 return a; 82 } 83 84 static int fstatic(int n) { 85 int a = 0; 86 short aa = 0; 87 char aaa = 0; 88 int b[10]; 89 90 #pragma omp target if (n > 50) 91 { 92 a += 1; 93 aa += 1; 94 aaa += 1; 95 b[2] += 1; 96 } 97 98 return a; 99 } 100 101 struct S1 { 102 double a; 103 104 int r1(int n) { 105 int b = n + 1; 106 short int c[2][n]; 107 108 #pragma omp target if (n > 60) 109 { 110 this->a = (double)b + 1.5; 111 c[1][1] = ++a; 112 baz(a, a); 113 } 114 115 return c[1][1] + (int)b; 116 } 117 }; 118 119 int bar(int n) { 120 int a = 0; 121 122 a += foo(n); 123 124 S1 S; 125 a += S.r1(n); 126 127 a += fstatic(n); 128 129 a += ftemplate<int>(n); 130 131 return a; 132 } 133 134 int baz(int f, double &a) { 135 #pragma omp parallel 136 f = 2 + a; 137 return f; 138 } 139 140 extern void assert(int) throw() __attribute__((__noreturn__)); 141 void unreachable_call() { 142 #pragma omp target 143 assert(0); 144 } 145 146 #endif 147 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25 148 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[PTR1:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR0:[0-9]+]] { 149 // CHECK1-NEXT: entry: 150 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 151 // CHECK1-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8 152 // CHECK1-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8 153 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 154 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 155 // CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8 156 // CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8 157 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8 158 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment, ptr [[DYN_PTR]]) 159 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 160 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 161 // CHECK1: user_code.entry: 162 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) 163 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 164 // CHECK1-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 8 165 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 166 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 167 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) 168 // CHECK1-NEXT: call void @__kmpc_target_deinit() 169 // CHECK1-NEXT: ret void 170 // CHECK1: worker.exit: 171 // CHECK1-NEXT: ret void 172 // 173 // 174 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined 175 // CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR1:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { 176 // CHECK1-NEXT: entry: 177 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 178 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 179 // CHECK1-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8 180 // CHECK1-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8 181 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 182 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 183 // CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8 184 // CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8 185 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 8 186 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8 187 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 188 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 189 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 190 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 191 // CHECK1-NEXT: ret void 192 // 193 // 194 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 195 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4:[0-9]+]] { 196 // CHECK1-NEXT: entry: 197 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 198 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 199 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment, ptr [[DYN_PTR]]) 200 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 201 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 202 // CHECK1: user_code.entry: 203 // CHECK1-NEXT: call void @__kmpc_target_deinit() 204 // CHECK1-NEXT: ret void 205 // CHECK1: worker.exit: 206 // CHECK1-NEXT: ret void 207 // 208 // 209 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47 210 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[AA:%.*]]) #[[ATTR4]] { 211 // CHECK1-NEXT: entry: 212 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 213 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 214 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 215 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 216 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment, ptr [[DYN_PTR]]) 217 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 218 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 219 // CHECK1: user_code.entry: 220 // CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 221 // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 222 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 223 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 224 // CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 225 // CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 226 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 227 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2 228 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 229 // CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 230 // CHECK1-NEXT: call void @__kmpc_target_deinit() 231 // CHECK1-NEXT: ret void 232 // CHECK1: worker.exit: 233 // CHECK1-NEXT: ret void 234 // 235 // 236 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53 237 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i64 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i64 [[VLA1:%.*]], i64 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] { 238 // CHECK1-NEXT: entry: 239 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 240 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 241 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 242 // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 243 // CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 244 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 245 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 246 // CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 247 // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 248 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 249 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 250 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 251 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 252 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 253 // CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 254 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 255 // CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 256 // CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 257 // CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 258 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 259 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 260 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 261 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 262 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 263 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 264 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 265 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 266 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 267 // CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment, ptr [[DYN_PTR]]) 268 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 269 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 270 // CHECK1: user_code.entry: 271 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 272 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 273 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 274 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 275 // CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 276 // CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP10]] to double 277 // CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 278 // CHECK1-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float 279 // CHECK1-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 280 // CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 281 // CHECK1-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 282 // CHECK1-NEXT: [[CONV8:%.*]] = fpext float [[TMP11]] to double 283 // CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 284 // CHECK1-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float 285 // CHECK1-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 286 // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 287 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 2 288 // CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 289 // CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP12]], 1.000000e+00 290 // CHECK1-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 291 // CHECK1-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP5]] 292 // CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP13]] 293 // CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3 294 // CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 295 // CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP14]], 1.000000e+00 296 // CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 297 // CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 298 // CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[X]], align 8 299 // CHECK1-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP15]], 1 300 // CHECK1-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 301 // CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 302 // CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[Y]], align 8 303 // CHECK1-NEXT: [[CONV18:%.*]] = sext i8 [[TMP16]] to i32 304 // CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 305 // CHECK1-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 306 // CHECK1-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 307 // CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(8) ptr @_ZN2TTIxcEixEi(ptr nonnull align 8 dereferenceable(16) [[TMP7]], i32 0) #[[ATTR10:[0-9]+]] 308 // CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8 309 // CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 310 // CHECK1-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8 311 // CHECK1-NEXT: call void @__kmpc_target_deinit() 312 // CHECK1-NEXT: ret void 313 // CHECK1: worker.exit: 314 // CHECK1-NEXT: ret void 315 // 316 // 317 // CHECK1-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi 318 // CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { 319 // CHECK1-NEXT: entry: 320 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 321 // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 322 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 323 // CHECK1-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 324 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 325 // CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[THIS1]], i32 0, i32 0 326 // CHECK1-NEXT: ret ptr [[X]] 327 // 328 // 329 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90 330 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[A:%.*]], i64 [[AA:%.*]], i64 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { 331 // CHECK1-NEXT: entry: 332 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 333 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 334 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 335 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 336 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 337 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 338 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 339 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 340 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 341 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 342 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 343 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment, ptr [[DYN_PTR]]) 344 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 345 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 346 // CHECK1: user_code.entry: 347 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 348 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 349 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 350 // CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 351 // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 352 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 353 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 354 // CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 355 // CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 356 // CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 357 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 358 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 359 // CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 360 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 361 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 362 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 363 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 364 // CHECK1-NEXT: call void @__kmpc_target_deinit() 365 // CHECK1-NEXT: ret void 366 // CHECK1: worker.exit: 367 // CHECK1-NEXT: ret void 368 // 369 // 370 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108 371 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[THIS:%.*]], i64 [[B:%.*]], i64 [[VLA:%.*]], i64 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] { 372 // CHECK1-NEXT: entry: 373 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 374 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 375 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 376 // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 377 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 378 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 379 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 380 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 381 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 382 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 383 // CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 384 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 385 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 386 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 387 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 388 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 389 // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment, ptr [[DYN_PTR]]) 390 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 391 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 392 // CHECK1: user_code.entry: 393 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 394 // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double 395 // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 396 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 397 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 398 // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 399 // CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[A3]], align 8 400 // CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 401 // CHECK1-NEXT: store double [[INC]], ptr [[A3]], align 8 402 // CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 403 // CHECK1-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]] 404 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP7]] 405 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 406 // CHECK1-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 407 // CHECK1-NEXT: [[A6:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 408 // CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[A6]], align 8 409 // CHECK1-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 410 // CHECK1-NEXT: [[A8:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 411 // CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]] 412 // CHECK1-NEXT: call void @__kmpc_target_deinit() 413 // CHECK1-NEXT: ret void 414 // CHECK1: worker.exit: 415 // CHECK1-NEXT: ret void 416 // 417 // 418 // CHECK1-LABEL: define {{[^@]+}}@_Z3baziRd 419 // CHECK1-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { 420 // CHECK1-NEXT: entry: 421 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 422 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 423 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) 424 // CHECK1-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) 425 // CHECK1-NEXT: store i32 [[F1]], ptr [[F]], align 4 426 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 427 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 428 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 429 // CHECK1-NEXT: store ptr [[F]], ptr [[TMP2]], align 8 430 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 431 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 432 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3baziRd_omp_outlined, ptr @_Z3baziRd_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2) 433 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[F]], align 4 434 // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[F]], i64 4) 435 // CHECK1-NEXT: ret i32 [[TMP4]] 436 // 437 // 438 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 439 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4]] { 440 // CHECK1-NEXT: entry: 441 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 442 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 443 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment, ptr [[DYN_PTR]]) 444 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 445 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 446 // CHECK1: user_code.entry: 447 // CHECK1-NEXT: call void @_Z6asserti(i32 0) #[[ATTR11:[0-9]+]] 448 // CHECK1-NEXT: unreachable 449 // CHECK1: worker.exit: 450 // CHECK1-NEXT: ret void 451 // CHECK1: 1: 452 // CHECK1-NEXT: call void @__kmpc_target_deinit() 453 // CHECK1-NEXT: ret void 454 // 455 // 456 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 457 // CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[A:%.*]], i64 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { 458 // CHECK1-NEXT: entry: 459 // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 460 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 461 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 462 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 463 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 464 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 465 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 466 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 467 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 468 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment, ptr [[DYN_PTR]]) 469 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 470 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 471 // CHECK1: user_code.entry: 472 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 473 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 474 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 475 // CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 476 // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 477 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 478 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 479 // CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 480 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 481 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 482 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1 483 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 484 // CHECK1-NEXT: call void @__kmpc_target_deinit() 485 // CHECK1-NEXT: ret void 486 // CHECK1: worker.exit: 487 // CHECK1-NEXT: ret void 488 // 489 // 490 // CHECK1-LABEL: define {{[^@]+}}@_Z3baziRd_omp_outlined 491 // CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[F:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { 492 // CHECK1-NEXT: entry: 493 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 494 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 495 // CHECK1-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8 496 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 497 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 498 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 499 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 500 // CHECK1-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 8 501 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 502 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 8 503 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 504 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 505 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 506 // CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 507 // CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] 508 // CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32 509 // CHECK1-NEXT: store i32 [[CONV]], ptr [[TMP0]], align 4 510 // CHECK1-NEXT: ret void 511 // 512 // 513 // CHECK1-LABEL: define {{[^@]+}}@_Z3baziRd_omp_outlined_wrapper 514 // CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] { 515 // CHECK1-NEXT: entry: 516 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 517 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 518 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 519 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 520 // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 521 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 522 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 523 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 524 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 525 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 526 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 527 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 528 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 529 // CHECK1-NEXT: call void @_Z3baziRd_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]] 530 // CHECK1-NEXT: ret void 531 // 532 // 533 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25 534 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[PTR1:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR0:[0-9]+]] { 535 // CHECK2-NEXT: entry: 536 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 537 // CHECK2-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 4 538 // CHECK2-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 4 539 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 540 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 541 // CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4 542 // CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4 543 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4 544 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment, ptr [[DYN_PTR]]) 545 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 546 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 547 // CHECK2: user_code.entry: 548 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) 549 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 550 // CHECK2-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 4 551 // CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 552 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 553 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) 554 // CHECK2-NEXT: call void @__kmpc_target_deinit() 555 // CHECK2-NEXT: ret void 556 // CHECK2: worker.exit: 557 // CHECK2-NEXT: ret void 558 // 559 // 560 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined 561 // CHECK2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR1:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { 562 // CHECK2-NEXT: entry: 563 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 564 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 565 // CHECK2-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 4 566 // CHECK2-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 4 567 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 568 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 569 // CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4 570 // CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4 571 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 4 572 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4 573 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 574 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 575 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4 576 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 577 // CHECK2-NEXT: ret void 578 // 579 // 580 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 581 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4:[0-9]+]] { 582 // CHECK2-NEXT: entry: 583 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 584 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 585 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment, ptr [[DYN_PTR]]) 586 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 587 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 588 // CHECK2: user_code.entry: 589 // CHECK2-NEXT: call void @__kmpc_target_deinit() 590 // CHECK2-NEXT: ret void 591 // CHECK2: worker.exit: 592 // CHECK2-NEXT: ret void 593 // 594 // 595 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47 596 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[AA:%.*]]) #[[ATTR4]] { 597 // CHECK2-NEXT: entry: 598 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 599 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 600 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 601 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 602 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment, ptr [[DYN_PTR]]) 603 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 604 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 605 // CHECK2: user_code.entry: 606 // CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 607 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 608 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 609 // CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 610 // CHECK2-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 611 // CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 612 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 613 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2 614 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 615 // CHECK2-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 616 // CHECK2-NEXT: call void @__kmpc_target_deinit() 617 // CHECK2-NEXT: ret void 618 // CHECK2: worker.exit: 619 // CHECK2-NEXT: ret void 620 // 621 // 622 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53 623 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i32 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i32 [[VLA1:%.*]], i32 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] { 624 // CHECK2-NEXT: entry: 625 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 626 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 627 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 628 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 629 // CHECK2-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 630 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 631 // CHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 632 // CHECK2-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 633 // CHECK2-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 634 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 635 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 636 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 637 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 638 // CHECK2-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 639 // CHECK2-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 640 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 641 // CHECK2-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 642 // CHECK2-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 643 // CHECK2-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 644 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 645 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 646 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 647 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 648 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 649 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 650 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 651 // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 652 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 653 // CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment, ptr [[DYN_PTR]]) 654 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 655 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 656 // CHECK2: user_code.entry: 657 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 658 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 659 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 660 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 661 // CHECK2-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 662 // CHECK2-NEXT: [[CONV:%.*]] = fpext float [[TMP10]] to double 663 // CHECK2-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 664 // CHECK2-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float 665 // CHECK2-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 666 // CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 667 // CHECK2-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 668 // CHECK2-NEXT: [[CONV8:%.*]] = fpext float [[TMP11]] to double 669 // CHECK2-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 670 // CHECK2-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float 671 // CHECK2-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 672 // CHECK2-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 673 // CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i32 0, i32 2 674 // CHECK2-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 675 // CHECK2-NEXT: [[ADD13:%.*]] = fadd double [[TMP12]], 1.000000e+00 676 // CHECK2-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 677 // CHECK2-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP5]] 678 // CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP13]] 679 // CHECK2-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3 680 // CHECK2-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 681 // CHECK2-NEXT: [[ADD16:%.*]] = fadd double [[TMP14]], 1.000000e+00 682 // CHECK2-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 683 // CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 684 // CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[X]], align 8 685 // CHECK2-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP15]], 1 686 // CHECK2-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 687 // CHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 688 // CHECK2-NEXT: [[TMP16:%.*]] = load i8, ptr [[Y]], align 8 689 // CHECK2-NEXT: [[CONV18:%.*]] = sext i8 [[TMP16]] to i32 690 // CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 691 // CHECK2-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 692 // CHECK2-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 693 // CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(8) ptr @_ZN2TTIxcEixEi(ptr nonnull align 8 dereferenceable(16) [[TMP7]], i32 0) #[[ATTR10:[0-9]+]] 694 // CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8 695 // CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 696 // CHECK2-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8 697 // CHECK2-NEXT: call void @__kmpc_target_deinit() 698 // CHECK2-NEXT: ret void 699 // CHECK2: worker.exit: 700 // CHECK2-NEXT: ret void 701 // 702 // 703 // CHECK2-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi 704 // CHECK2-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { 705 // CHECK2-NEXT: entry: 706 // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 707 // CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 708 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 709 // CHECK2-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 710 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 711 // CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[THIS1]], i32 0, i32 0 712 // CHECK2-NEXT: ret ptr [[X]] 713 // 714 // 715 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90 716 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], i32 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { 717 // CHECK2-NEXT: entry: 718 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 719 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 720 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 721 // CHECK2-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 722 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 723 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 724 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 725 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 726 // CHECK2-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 727 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 728 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 729 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment, ptr [[DYN_PTR]]) 730 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 731 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 732 // CHECK2: user_code.entry: 733 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 734 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 735 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 736 // CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 737 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 738 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 739 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 740 // CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 741 // CHECK2-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 742 // CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 743 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 744 // CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 745 // CHECK2-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 746 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 747 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 748 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 749 // CHECK2-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 750 // CHECK2-NEXT: call void @__kmpc_target_deinit() 751 // CHECK2-NEXT: ret void 752 // CHECK2: worker.exit: 753 // CHECK2-NEXT: ret void 754 // 755 // 756 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108 757 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[THIS:%.*]], i32 [[B:%.*]], i32 [[VLA:%.*]], i32 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] { 758 // CHECK2-NEXT: entry: 759 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 760 // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 761 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 762 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 763 // CHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 764 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 765 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 766 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 767 // CHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 768 // CHECK2-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 769 // CHECK2-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 770 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 771 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 772 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 773 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 774 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 775 // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment, ptr [[DYN_PTR]]) 776 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 777 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 778 // CHECK2: user_code.entry: 779 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 780 // CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double 781 // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 782 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 783 // CHECK2-NEXT: store double [[ADD]], ptr [[A]], align 8 784 // CHECK2-NEXT: [[A3:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 785 // CHECK2-NEXT: [[TMP6:%.*]] = load double, ptr [[A3]], align 8 786 // CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 787 // CHECK2-NEXT: store double [[INC]], ptr [[A3]], align 8 788 // CHECK2-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 789 // CHECK2-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]] 790 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP7]] 791 // CHECK2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 792 // CHECK2-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 793 // CHECK2-NEXT: [[A6:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 794 // CHECK2-NEXT: [[TMP8:%.*]] = load double, ptr [[A6]], align 8 795 // CHECK2-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 796 // CHECK2-NEXT: [[A8:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 797 // CHECK2-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]] 798 // CHECK2-NEXT: call void @__kmpc_target_deinit() 799 // CHECK2-NEXT: ret void 800 // CHECK2: worker.exit: 801 // CHECK2-NEXT: ret void 802 // 803 // 804 // CHECK2-LABEL: define {{[^@]+}}@_Z3baziRd 805 // CHECK2-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { 806 // CHECK2-NEXT: entry: 807 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 808 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 809 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) 810 // CHECK2-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) 811 // CHECK2-NEXT: store i32 [[F1]], ptr [[F]], align 4 812 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 813 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 814 // CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 815 // CHECK2-NEXT: store ptr [[F]], ptr [[TMP2]], align 4 816 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 817 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 818 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3baziRd_omp_outlined, ptr @_Z3baziRd_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 2) 819 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[F]], align 4 820 // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[F]], i32 4) 821 // CHECK2-NEXT: ret i32 [[TMP4]] 822 // 823 // 824 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 825 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4]] { 826 // CHECK2-NEXT: entry: 827 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 828 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 829 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment, ptr [[DYN_PTR]]) 830 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 831 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 832 // CHECK2: user_code.entry: 833 // CHECK2-NEXT: call void @_Z6asserti(i32 0) #[[ATTR11:[0-9]+]] 834 // CHECK2-NEXT: unreachable 835 // CHECK2: worker.exit: 836 // CHECK2-NEXT: ret void 837 // CHECK2: 1: 838 // CHECK2-NEXT: call void @__kmpc_target_deinit() 839 // CHECK2-NEXT: ret void 840 // 841 // 842 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 843 // CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { 844 // CHECK2-NEXT: entry: 845 // CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 846 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 847 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 848 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 849 // CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 850 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 851 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 852 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 853 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 854 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment, ptr [[DYN_PTR]]) 855 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 856 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 857 // CHECK2: user_code.entry: 858 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 859 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 860 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 861 // CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 862 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 863 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 864 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 865 // CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 866 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 867 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 868 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1 869 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 870 // CHECK2-NEXT: call void @__kmpc_target_deinit() 871 // CHECK2-NEXT: ret void 872 // CHECK2: worker.exit: 873 // CHECK2-NEXT: ret void 874 // 875 // 876 // CHECK2-LABEL: define {{[^@]+}}@_Z3baziRd_omp_outlined 877 // CHECK2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[F:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { 878 // CHECK2-NEXT: entry: 879 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 880 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 881 // CHECK2-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 4 882 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 883 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 4 884 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 885 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 886 // CHECK2-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 4 887 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 888 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 4 889 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 890 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 891 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 892 // CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 893 // CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] 894 // CHECK2-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32 895 // CHECK2-NEXT: store i32 [[CONV]], ptr [[TMP0]], align 4 896 // CHECK2-NEXT: ret void 897 // 898 // 899 // CHECK2-LABEL: define {{[^@]+}}@_Z3baziRd_omp_outlined_wrapper 900 // CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] { 901 // CHECK2-NEXT: entry: 902 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 903 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 904 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 905 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4 906 // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 907 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 908 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 909 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) 910 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 911 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 912 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 913 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1 914 // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 915 // CHECK2-NEXT: call void @_Z3baziRd_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]] 916 // CHECK2-NEXT: ret void 917 // 918