1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // Test target codegen - host bc file has to be created first. 3 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 4 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 5 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 6 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 7 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 8 9 // expected-no-diagnostics 10 #ifndef HEADER 11 #define HEADER 12 13 void work(int *C) { 14 #pragma omp atomic 15 ++(*C); 16 } 17 18 void use(int *C) { 19 #pragma omp parallel num_threads(2) 20 work(C); 21 } 22 23 int main() { 24 int C = 0; 25 #pragma omp target map(C) 26 { 27 use(&C); 28 #pragma omp parallel num_threads(2) 29 use(&C); 30 } 31 32 return C; 33 } 34 35 #endif 36 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25 37 // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR0:[0-9]+]] { 38 // CHECK1-NEXT: entry: 39 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 40 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 41 // CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 42 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 43 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 44 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 45 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 46 // CHECK1: user_code.entry: 47 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 48 // CHECK1-NEXT: call void @_Z3usePi(i32* noundef [[TMP0]]) #[[ATTR7:[0-9]+]] 49 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 50 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* 51 // CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 52 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 53 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP5]], i64 1) 54 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 55 // CHECK1-NEXT: ret void 56 // CHECK1: worker.exit: 57 // CHECK1-NEXT: ret void 58 // 59 // 60 // CHECK1-LABEL: define {{[^@]+}}@_Z3usePi 61 // CHECK1-SAME: (i32* noundef [[C:%.*]]) #[[ATTR1:[0-9]+]] { 62 // CHECK1-NEXT: entry: 63 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 64 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 65 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 66 // CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 67 // CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 68 // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32** [[C_ADDR]] to i8* 69 // CHECK1-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 8 70 // CHECK1-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 71 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i64 1) 72 // CHECK1-NEXT: ret void 73 // 74 // 75 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ 76 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2:[0-9]+]] { 77 // CHECK1-NEXT: entry: 78 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 79 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 80 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 81 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 82 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 83 // CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 84 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 85 // CHECK1-NEXT: call void @_Z3usePi(i32* noundef [[TMP0]]) #[[ATTR7]] 86 // CHECK1-NEXT: ret void 87 // 88 // 89 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___wrapper 90 // CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { 91 // CHECK1-NEXT: entry: 92 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 93 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 94 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 95 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 96 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 97 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 98 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 99 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 100 // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 101 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 102 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** 103 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 104 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4:[0-9]+]] 105 // CHECK1-NEXT: ret void 106 // 107 // 108 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 109 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { 110 // CHECK1-NEXT: entry: 111 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 112 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 113 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 114 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 115 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 116 // CHECK1-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 117 // CHECK1-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 118 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 8 119 // CHECK1-NEXT: call void @_Z4workPi(i32* noundef [[TMP1]]) #[[ATTR7]] 120 // CHECK1-NEXT: ret void 121 // 122 // 123 // CHECK1-LABEL: define {{[^@]+}}@_Z4workPi 124 // CHECK1-SAME: (i32* noundef [[C:%.*]]) #[[ATTR1]] { 125 // CHECK1-NEXT: entry: 126 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 127 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 128 // CHECK1-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 129 // CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 130 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 131 // CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* 132 // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* 133 // CHECK1-NEXT: call void @__atomic_load(i64 noundef 4, i8* noundef [[TMP1]], i8* noundef [[TMP2]], i32 noundef 0) #[[ATTR7]] 134 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] 135 // CHECK1: atomic_cont: 136 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 137 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 138 // CHECK1-NEXT: store i32 [[ADD]], i32* [[ATOMIC_TEMP1]], align 4 139 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* 140 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* 141 // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8* 142 // CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 4, i8* noundef [[TMP4]], i8* noundef [[TMP5]], i8* noundef [[TMP6]], i32 noundef 0, i32 noundef 0) #[[ATTR7]] 143 // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] 144 // CHECK1: atomic_exit: 145 // CHECK1-NEXT: ret void 146 // 147 // 148 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 149 // CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { 150 // CHECK1-NEXT: entry: 151 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 152 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 153 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 154 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 155 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 156 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 157 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 158 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 159 // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 160 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 161 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32*** 162 // CHECK1-NEXT: [[TMP5:%.*]] = load i32**, i32*** [[TMP4]], align 8 163 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR4]] 164 // CHECK1-NEXT: ret void 165 // 166 // 167 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25 168 // CHECK2-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR0:[0-9]+]] { 169 // CHECK2-NEXT: entry: 170 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 171 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 172 // CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 173 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 174 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 175 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 176 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 177 // CHECK2: user_code.entry: 178 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 179 // CHECK2-NEXT: call void @_Z3usePi(i32* noundef [[TMP0]]) #[[ATTR7:[0-9]+]] 180 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 181 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* 182 // CHECK2-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 183 // CHECK2-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 184 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP5]], i32 1) 185 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 186 // CHECK2-NEXT: ret void 187 // CHECK2: worker.exit: 188 // CHECK2-NEXT: ret void 189 // 190 // 191 // CHECK2-LABEL: define {{[^@]+}}@_Z3usePi 192 // CHECK2-SAME: (i32* noundef [[C:%.*]]) #[[ATTR1:[0-9]+]] { 193 // CHECK2-NEXT: entry: 194 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 195 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 196 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 197 // CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 198 // CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 199 // CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32** [[C_ADDR]] to i8* 200 // CHECK2-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 4 201 // CHECK2-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 202 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i32 1) 203 // CHECK2-NEXT: ret void 204 // 205 // 206 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ 207 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2:[0-9]+]] { 208 // CHECK2-NEXT: entry: 209 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 210 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 211 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 212 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 213 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 214 // CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 215 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 216 // CHECK2-NEXT: call void @_Z3usePi(i32* noundef [[TMP0]]) #[[ATTR7]] 217 // CHECK2-NEXT: ret void 218 // 219 // 220 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined___wrapper 221 // CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { 222 // CHECK2-NEXT: entry: 223 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 224 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 225 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 226 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 227 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 228 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 229 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 230 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 231 // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 232 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 233 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** 234 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 235 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4:[0-9]+]] 236 // CHECK2-NEXT: ret void 237 // 238 // 239 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 240 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { 241 // CHECK2-NEXT: entry: 242 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 243 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 244 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 245 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 246 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 247 // CHECK2-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 248 // CHECK2-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 249 // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 4 250 // CHECK2-NEXT: call void @_Z4workPi(i32* noundef [[TMP1]]) #[[ATTR7]] 251 // CHECK2-NEXT: ret void 252 // 253 // 254 // CHECK2-LABEL: define {{[^@]+}}@_Z4workPi 255 // CHECK2-SAME: (i32* noundef [[C:%.*]]) #[[ATTR1]] { 256 // CHECK2-NEXT: entry: 257 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 258 // CHECK2-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 259 // CHECK2-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 260 // CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 261 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 262 // CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* 263 // CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* 264 // CHECK2-NEXT: call void @__atomic_load(i32 noundef 4, i8* noundef [[TMP1]], i8* noundef [[TMP2]], i32 noundef 0) #[[ATTR7]] 265 // CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] 266 // CHECK2: atomic_cont: 267 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 268 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 269 // CHECK2-NEXT: store i32 [[ADD]], i32* [[ATOMIC_TEMP1]], align 4 270 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* 271 // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* 272 // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8* 273 // CHECK2-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i32 noundef 4, i8* noundef [[TMP4]], i8* noundef [[TMP5]], i8* noundef [[TMP6]], i32 noundef 0, i32 noundef 0) #[[ATTR7]] 274 // CHECK2-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] 275 // CHECK2: atomic_exit: 276 // CHECK2-NEXT: ret void 277 // 278 // 279 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 280 // CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { 281 // CHECK2-NEXT: entry: 282 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 283 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 284 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 285 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 286 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 287 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 288 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 289 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 290 // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 291 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 292 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32*** 293 // CHECK2-NEXT: [[TMP5:%.*]] = load i32**, i32*** [[TMP4]], align 4 294 // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR4]] 295 // CHECK2-NEXT: ret void 296 // 297