1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // Test target codegen - host bc file has to be created first. 3 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 4 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 5 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 6 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 7 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK3 8 9 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 10 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK4 11 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 12 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK5 13 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK6 14 15 // expected-no-diagnostics 16 #ifndef HEADER 17 #define HEADER 18 19 template<typename tx> 20 tx ftemplate(int n) { 21 tx a = 0; 22 short aa = 0; 23 tx b[10]; 24 25 #pragma omp target parallel map(tofrom: aa) num_threads(1024) 26 { 27 aa += 1; 28 } 29 30 #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(n) 31 { 32 a += 1; 33 aa += 1; 34 b[2] += 1; 35 } 36 37 return a; 38 } 39 40 int bar(int n){ 41 int a = 0; 42 43 a += ftemplate<int>(n); 44 45 return a; 46 } 47 48 #endif 49 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25 50 // CHECK1-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { 51 // CHECK1-NEXT: entry: 52 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 53 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 54 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 55 // CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 56 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) 57 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 58 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 59 // CHECK1: user_code.entry: 60 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 61 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 62 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP0]] to i8* 63 // CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 64 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 65 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) 66 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 67 // CHECK1-NEXT: ret void 68 // CHECK1: worker.exit: 69 // CHECK1-NEXT: ret void 70 // 71 // 72 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ 73 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { 74 // CHECK1-NEXT: entry: 75 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 76 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 77 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 78 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 79 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 80 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 81 // CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 82 // CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 83 // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 84 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 85 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 86 // CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 87 // CHECK1-NEXT: ret void 88 // 89 // 90 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 91 // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { 92 // CHECK1-NEXT: entry: 93 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 94 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 95 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 96 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 97 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 98 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 99 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 100 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 101 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 102 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 103 // CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 104 // CHECK1-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 105 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* 106 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) 107 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 108 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 109 // CHECK1: user_code.entry: 110 // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) 111 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 112 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 113 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP0]] to i8* 114 // CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 115 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 116 // CHECK1-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP1]] to i8* 117 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 8 118 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 119 // CHECK1-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* 120 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 121 // CHECK1-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 122 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i64 3) 123 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 124 // CHECK1-NEXT: ret void 125 // CHECK1: worker.exit: 126 // CHECK1-NEXT: ret void 127 // 128 // 129 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 130 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { 131 // CHECK1-NEXT: entry: 132 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 133 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 134 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 135 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 136 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 137 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 138 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 139 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 140 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 141 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 142 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 143 // CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 144 // CHECK1-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 145 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 146 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 147 // CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 148 // CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 149 // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 150 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 151 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 152 // CHECK1-NEXT: store i16 [[CONV2]], i16* [[TMP1]], align 2 153 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 2 154 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 155 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 156 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 157 // CHECK1-NEXT: ret void 158 // 159 // 160 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25 161 // CHECK2-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { 162 // CHECK2-NEXT: entry: 163 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 164 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 165 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 166 // CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 167 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) 168 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 169 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 170 // CHECK2: user_code.entry: 171 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 172 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 173 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP0]] to i8* 174 // CHECK2-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 175 // CHECK2-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 176 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) 177 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 178 // CHECK2-NEXT: ret void 179 // CHECK2: worker.exit: 180 // CHECK2-NEXT: ret void 181 // 182 // 183 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ 184 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { 185 // CHECK2-NEXT: entry: 186 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 187 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 188 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 189 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 190 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 191 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 192 // CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 193 // CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 194 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 195 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 196 // CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 197 // CHECK2-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 198 // CHECK2-NEXT: ret void 199 // 200 // 201 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 202 // CHECK2-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { 203 // CHECK2-NEXT: entry: 204 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 205 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 206 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 207 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 208 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 209 // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 210 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 211 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 212 // CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 213 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 214 // CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 215 // CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 216 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) 217 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 218 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 219 // CHECK2: user_code.entry: 220 // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) 221 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 222 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 223 // CHECK2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP0]] to i8* 224 // CHECK2-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 225 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 226 // CHECK2-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP1]] to i8* 227 // CHECK2-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 228 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 229 // CHECK2-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* 230 // CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 231 // CHECK2-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 232 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i32 3) 233 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 234 // CHECK2-NEXT: ret void 235 // CHECK2: worker.exit: 236 // CHECK2-NEXT: ret void 237 // 238 // 239 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 240 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { 241 // CHECK2-NEXT: entry: 242 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 243 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 244 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 245 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 246 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 247 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 248 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 249 // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 250 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 251 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 252 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 253 // CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 254 // CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 255 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 256 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 257 // CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 258 // CHECK2-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 259 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 260 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 261 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 262 // CHECK2-NEXT: store i16 [[CONV2]], i16* [[TMP1]], align 2 263 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 2 264 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 265 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 266 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 267 // CHECK2-NEXT: ret void 268 // 269 // 270 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25 271 // CHECK3-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { 272 // CHECK3-NEXT: entry: 273 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 274 // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 275 // CHECK3-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 276 // CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 277 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) 278 // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 279 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 280 // CHECK3: user_code.entry: 281 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 282 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 283 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP0]] to i8* 284 // CHECK3-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 285 // CHECK3-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 286 // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) 287 // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 288 // CHECK3-NEXT: ret void 289 // CHECK3: worker.exit: 290 // CHECK3-NEXT: ret void 291 // 292 // 293 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ 294 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { 295 // CHECK3-NEXT: entry: 296 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 297 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 298 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 299 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 300 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 301 // CHECK3-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 302 // CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 303 // CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 304 // CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 305 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 306 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 307 // CHECK3-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 308 // CHECK3-NEXT: ret void 309 // 310 // 311 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 312 // CHECK3-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { 313 // CHECK3-NEXT: entry: 314 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 315 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 316 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 317 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 318 // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 319 // CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 320 // CHECK3-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 321 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 322 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 323 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 324 // CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 325 // CHECK3-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 326 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) 327 // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 328 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 329 // CHECK3: user_code.entry: 330 // CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) 331 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 332 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 333 // CHECK3-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP0]] to i8* 334 // CHECK3-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 335 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 336 // CHECK3-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP1]] to i8* 337 // CHECK3-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 338 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 339 // CHECK3-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* 340 // CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 341 // CHECK3-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 342 // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i32 3) 343 // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 344 // CHECK3-NEXT: ret void 345 // CHECK3: worker.exit: 346 // CHECK3-NEXT: ret void 347 // 348 // 349 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 350 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { 351 // CHECK3-NEXT: entry: 352 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 353 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 354 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 355 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 356 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 357 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 358 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 359 // CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 360 // CHECK3-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 361 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 362 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 363 // CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 364 // CHECK3-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 365 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 366 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 367 // CHECK3-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 368 // CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 369 // CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 370 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 371 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 372 // CHECK3-NEXT: store i16 [[CONV2]], i16* [[TMP1]], align 2 373 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 2 374 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 375 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 376 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 377 // CHECK3-NEXT: ret void 378 // 379 // 380 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25 381 // CHECK4-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { 382 // CHECK4-NEXT: entry: 383 // CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 384 // CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 385 // CHECK4-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 386 // CHECK4-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 387 // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) 388 // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 389 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 390 // CHECK4: user_code.entry: 391 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 392 // CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 393 // CHECK4-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP0]] to i8* 394 // CHECK4-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 395 // CHECK4-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 396 // CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) 397 // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 398 // CHECK4-NEXT: ret void 399 // CHECK4: worker.exit: 400 // CHECK4-NEXT: ret void 401 // 402 // 403 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ 404 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { 405 // CHECK4-NEXT: entry: 406 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 407 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 408 // CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 409 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 410 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 411 // CHECK4-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 412 // CHECK4-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 413 // CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 414 // CHECK4-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 415 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 416 // CHECK4-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 417 // CHECK4-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 418 // CHECK4-NEXT: ret void 419 // 420 // 421 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 422 // CHECK4-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { 423 // CHECK4-NEXT: entry: 424 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 425 // CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 426 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 427 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 428 // CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 429 // CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 430 // CHECK4-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 431 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 432 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 433 // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 434 // CHECK4-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 435 // CHECK4-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 436 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* 437 // CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) 438 // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 439 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 440 // CHECK4: user_code.entry: 441 // CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) 442 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 443 // CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 444 // CHECK4-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP0]] to i8* 445 // CHECK4-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 446 // CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 447 // CHECK4-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP1]] to i8* 448 // CHECK4-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 8 449 // CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 450 // CHECK4-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* 451 // CHECK4-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 452 // CHECK4-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 453 // CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i64 3) 454 // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 455 // CHECK4-NEXT: ret void 456 // CHECK4: worker.exit: 457 // CHECK4-NEXT: ret void 458 // 459 // 460 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 461 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { 462 // CHECK4-NEXT: entry: 463 // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 464 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 465 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 466 // CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 467 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 468 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 469 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 470 // CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 471 // CHECK4-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 472 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 473 // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 474 // CHECK4-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 475 // CHECK4-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 476 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 477 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 478 // CHECK4-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 479 // CHECK4-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 480 // CHECK4-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 481 // CHECK4-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 482 // CHECK4-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 483 // CHECK4-NEXT: store i16 [[CONV2]], i16* [[TMP1]], align 2 484 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 2 485 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 486 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 487 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 488 // CHECK4-NEXT: ret void 489 // 490 // 491 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25 492 // CHECK5-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { 493 // CHECK5-NEXT: entry: 494 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 495 // CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 496 // CHECK5-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 497 // CHECK5-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 498 // CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) 499 // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 500 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 501 // CHECK5: user_code.entry: 502 // CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 503 // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 504 // CHECK5-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP0]] to i8* 505 // CHECK5-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 506 // CHECK5-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 507 // CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) 508 // CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 509 // CHECK5-NEXT: ret void 510 // CHECK5: worker.exit: 511 // CHECK5-NEXT: ret void 512 // 513 // 514 // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ 515 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { 516 // CHECK5-NEXT: entry: 517 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 518 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 519 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 520 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 521 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 522 // CHECK5-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 523 // CHECK5-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 524 // CHECK5-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 525 // CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 526 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 527 // CHECK5-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 528 // CHECK5-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 529 // CHECK5-NEXT: ret void 530 // 531 // 532 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 533 // CHECK5-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { 534 // CHECK5-NEXT: entry: 535 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 536 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 537 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 538 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 539 // CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 540 // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 541 // CHECK5-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 542 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 543 // CHECK5-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 544 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 545 // CHECK5-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 546 // CHECK5-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 547 // CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) 548 // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 549 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 550 // CHECK5: user_code.entry: 551 // CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) 552 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 553 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 554 // CHECK5-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP0]] to i8* 555 // CHECK5-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 556 // CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 557 // CHECK5-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP1]] to i8* 558 // CHECK5-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 559 // CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 560 // CHECK5-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* 561 // CHECK5-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 562 // CHECK5-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 563 // CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i32 3) 564 // CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 565 // CHECK5-NEXT: ret void 566 // CHECK5: worker.exit: 567 // CHECK5-NEXT: ret void 568 // 569 // 570 // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 571 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { 572 // CHECK5-NEXT: entry: 573 // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 574 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 575 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 576 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 577 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 578 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 579 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 580 // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 581 // CHECK5-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 582 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 583 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 584 // CHECK5-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 585 // CHECK5-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 586 // CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 587 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 588 // CHECK5-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 589 // CHECK5-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 590 // CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 591 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 592 // CHECK5-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 593 // CHECK5-NEXT: store i16 [[CONV2]], i16* [[TMP1]], align 2 594 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 2 595 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 596 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 597 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 598 // CHECK5-NEXT: ret void 599 // 600 // 601 // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25 602 // CHECK6-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { 603 // CHECK6-NEXT: entry: 604 // CHECK6-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 605 // CHECK6-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 606 // CHECK6-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 607 // CHECK6-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 608 // CHECK6-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) 609 // CHECK6-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 610 // CHECK6-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 611 // CHECK6: user_code.entry: 612 // CHECK6-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 613 // CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 614 // CHECK6-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP0]] to i8* 615 // CHECK6-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 616 // CHECK6-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 617 // CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) 618 // CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 619 // CHECK6-NEXT: ret void 620 // CHECK6: worker.exit: 621 // CHECK6-NEXT: ret void 622 // 623 // 624 // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__ 625 // CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { 626 // CHECK6-NEXT: entry: 627 // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 628 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 629 // CHECK6-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 630 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 631 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 632 // CHECK6-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 633 // CHECK6-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 634 // CHECK6-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 635 // CHECK6-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 636 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 637 // CHECK6-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 638 // CHECK6-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 639 // CHECK6-NEXT: ret void 640 // 641 // 642 // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 643 // CHECK6-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { 644 // CHECK6-NEXT: entry: 645 // CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 646 // CHECK6-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 647 // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 648 // CHECK6-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 649 // CHECK6-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 650 // CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 651 // CHECK6-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 652 // CHECK6-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 653 // CHECK6-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 654 // CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 655 // CHECK6-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 656 // CHECK6-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 657 // CHECK6-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) 658 // CHECK6-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 659 // CHECK6-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 660 // CHECK6: user_code.entry: 661 // CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) 662 // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 663 // CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 664 // CHECK6-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP0]] to i8* 665 // CHECK6-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 666 // CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 667 // CHECK6-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP1]] to i8* 668 // CHECK6-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 669 // CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 670 // CHECK6-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* 671 // CHECK6-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 672 // CHECK6-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 673 // CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i32 3) 674 // CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 675 // CHECK6-NEXT: ret void 676 // CHECK6: worker.exit: 677 // CHECK6-NEXT: ret void 678 // 679 // 680 // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1 681 // CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { 682 // CHECK6-NEXT: entry: 683 // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 684 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 685 // CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 686 // CHECK6-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 687 // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 688 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 689 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 690 // CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 691 // CHECK6-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 692 // CHECK6-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 693 // CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 694 // CHECK6-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 695 // CHECK6-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 696 // CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 697 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 698 // CHECK6-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 699 // CHECK6-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 700 // CHECK6-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 701 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 702 // CHECK6-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 703 // CHECK6-NEXT: store i16 [[CONV2]], i16* [[TMP1]], align 2 704 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 2 705 // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 706 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 707 // CHECK6-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 708 // CHECK6-NEXT: ret void 709 // 710