1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // Test target codegen - host bc file has to be created first. 3 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 4 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 5 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 6 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 7 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 8 9 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 10 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 11 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 12 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 13 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2 14 15 // expected-no-diagnostics 16 #ifndef HEADER 17 #define HEADER 18 19 template<typename tx> 20 tx ftemplate(int n) { 21 tx a = 0; 22 short aa = 0; 23 tx b[10]; 24 25 #pragma omp target parallel map(tofrom: aa) num_threads(1024) 26 { 27 aa += 1; 28 } 29 30 #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(n) 31 { 32 a += 1; 33 aa += 1; 34 b[2] += 1; 35 } 36 37 return a; 38 } 39 40 int bar(int n){ 41 int a = 0; 42 43 a += ftemplate<int>(n); 44 45 return a; 46 } 47 48 #endif 49 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25 50 // CHECK1-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { 51 // CHECK1-NEXT: entry: 52 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 53 // CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 54 // CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 55 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 56 // CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 57 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) 58 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 59 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 60 // CHECK1: user_code.entry: 61 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 62 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 63 // CHECK1-NEXT: store i16* [[TMP0]], i16** [[TMP3]], align 8 64 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* 65 // CHECK1-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) 66 // CHECK1-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) 67 // CHECK1-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 8 68 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon* 69 // CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP7]], %struct.anon* [[TMP8]], align 8 70 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP6]]) 71 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i64 8) 72 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 73 // CHECK1-NEXT: ret void 74 // CHECK1: worker.exit: 75 // CHECK1-NEXT: ret void 76 // 77 // 78 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ 79 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { 80 // CHECK1-NEXT: entry: 81 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 82 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 83 // CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 84 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 85 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 86 // CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 87 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 88 // CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 89 // CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 90 // CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 91 // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 92 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 93 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 94 // CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP2]], align 2 95 // CHECK1-NEXT: ret void 96 // 97 // 98 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 99 // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { 100 // CHECK1-NEXT: entry: 101 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 102 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 103 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 104 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 105 // CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 106 // CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 107 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 108 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 109 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 110 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 111 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 112 // CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 113 // CHECK1-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 114 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* 115 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) 116 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 117 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 118 // CHECK1: user_code.entry: 119 // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) 120 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 121 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 122 // CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP6]], align 8 123 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 124 // CHECK1-NEXT: store i16* [[TMP1]], i16** [[TMP7]], align 8 125 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 126 // CHECK1-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP8]], align 8 127 // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* 128 // CHECK1-NEXT: [[TMP10:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 24) 129 // CHECK1-NEXT: [[TMP11:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP9]], i8* [[TMP10]]) 130 // CHECK1-NEXT: [[TMP12:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 8 131 // CHECK1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to %struct.anon.0* 132 // CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP12]], %struct.anon.0* [[TMP13]], align 8 133 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP11]]) 134 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP10]], i64 24) 135 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 136 // CHECK1-NEXT: ret void 137 // CHECK1: worker.exit: 138 // CHECK1-NEXT: ret void 139 // 140 // 141 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 142 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { 143 // CHECK1-NEXT: entry: 144 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 145 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 146 // CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 147 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 148 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 149 // CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 150 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 151 // CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 152 // CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 153 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 154 // CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8 155 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 156 // CHECK1-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 157 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 158 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 159 // CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 160 // CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP4]], align 2 161 // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 162 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 163 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 164 // CHECK1-NEXT: store i16 [[CONV2]], i16* [[TMP4]], align 2 165 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 166 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 167 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 168 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 169 // CHECK1-NEXT: ret void 170 // 171 // 172 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25 173 // CHECK2-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { 174 // CHECK2-NEXT: entry: 175 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 176 // CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 177 // CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 178 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 179 // CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 180 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) 181 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 182 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 183 // CHECK2: user_code.entry: 184 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) 185 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 186 // CHECK2-NEXT: store i16* [[TMP0]], i16** [[TMP3]], align 4 187 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* 188 // CHECK2-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) 189 // CHECK2-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) 190 // CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 4 191 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon* 192 // CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP7]], %struct.anon* [[TMP8]], align 4 193 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP6]]) 194 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i32 4) 195 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 196 // CHECK2-NEXT: ret void 197 // CHECK2: worker.exit: 198 // CHECK2-NEXT: ret void 199 // 200 // 201 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ 202 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { 203 // CHECK2-NEXT: entry: 204 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 205 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 206 // CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 207 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 208 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 209 // CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 210 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 211 // CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 212 // CHECK2-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 213 // CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 214 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 215 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 216 // CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 217 // CHECK2-NEXT: store i16 [[CONV1]], i16* [[TMP2]], align 2 218 // CHECK2-NEXT: ret void 219 // 220 // 221 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30 222 // CHECK2-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { 223 // CHECK2-NEXT: entry: 224 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 225 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 226 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 227 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 228 // CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 229 // CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 230 // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 231 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 232 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 233 // CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 234 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 235 // CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 236 // CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 237 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) 238 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 239 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 240 // CHECK2: user_code.entry: 241 // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) 242 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 243 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 244 // CHECK2-NEXT: store i32* [[TMP0]], i32** [[TMP6]], align 4 245 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 246 // CHECK2-NEXT: store i16* [[TMP1]], i16** [[TMP7]], align 4 247 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 248 // CHECK2-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP8]], align 4 249 // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* 250 // CHECK2-NEXT: [[TMP10:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 12) 251 // CHECK2-NEXT: [[TMP11:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP9]], i8* [[TMP10]]) 252 // CHECK2-NEXT: [[TMP12:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 4 253 // CHECK2-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to %struct.anon.0* 254 // CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP12]], %struct.anon.0* [[TMP13]], align 4 255 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP11]]) 256 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP10]], i32 12) 257 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) 258 // CHECK2-NEXT: ret void 259 // CHECK2: worker.exit: 260 // CHECK2-NEXT: ret void 261 // 262 // 263 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 264 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { 265 // CHECK2-NEXT: entry: 266 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 267 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 268 // CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 269 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 270 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 271 // CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 272 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 273 // CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 274 // CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 275 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 276 // CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 4 277 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 278 // CHECK2-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 279 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 280 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 281 // CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 282 // CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP4]], align 2 283 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 284 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 285 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 286 // CHECK2-NEXT: store i16 [[CONV2]], i16* [[TMP4]], align 2 287 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 288 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 289 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 290 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 291 // CHECK2-NEXT: ret void 292 // 293