1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // REQUIRES: amdgpu-registered-target 3 4 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc 5 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU 6 7 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR 8 9 // Check same results after serialization round-trip 10 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s 11 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH 12 13 // expected-no-diagnostics 14 15 #ifndef HEADER 16 #define HEADER 17 int foo() { 18 int i; 19 int j; 20 int sum[10][10]; 21 22 #pragma omp target teams loop reduction(+:sum) collapse(2) \ 23 bind(parallel) order(concurrent) lastprivate(j) map(tofrom:sum) 24 for(i=0; i<10; i++) 25 for(j=0; j<10; j++) 26 sum[i][j] += i; 27 28 return 0; 29 } 30 #endif 31 // IR-PCH-HOST-LABEL: define {{[^@]+}}@_Z3foov 32 // IR-PCH-HOST-SAME: () #[[ATTR0:[0-9]+]] { 33 // IR-PCH-HOST-NEXT: entry: 34 // IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 35 // IR-PCH-HOST-NEXT: [[J:%.*]] = alloca i32, align 4 36 // IR-PCH-HOST-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 37 // IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 38 // IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load i32, ptr [[J]], align 4 39 // IR-PCH-HOST-NEXT: store i32 [[TMP0]], ptr [[J_CASTED]], align 4 40 // IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i64, ptr [[J_CASTED]], align 8 41 // IR-PCH-HOST-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22(i64 [[TMP1]], ptr [[SUM]]) #[[ATTR2:[0-9]+]] 42 // IR-PCH-HOST-NEXT: ret i32 0 43 // IR-PCH-HOST-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 44 // IR-PCH-HOST-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { 45 // IR-PCH-HOST-NEXT: entry: 46 // IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 47 // IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 48 // IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 49 // IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 50 // IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 51 // IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 52 // IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i32, ptr [[J_ADDR]], align 4 53 // IR-PCH-HOST-NEXT: store i32 [[TMP1]], ptr [[J_CASTED]], align 4 54 // IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load i64, ptr [[J_CASTED]], align 8 55 // IR-PCH-HOST-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP2]], ptr [[TMP0]]) 56 // IR-PCH-HOST-NEXT: ret void 57 // IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp_outlined. 58 // IR-PCH-HOST-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 59 // IR-PCH-HOST-NEXT: entry: 60 // IR-PCH-HOST-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 61 // IR-PCH-HOST-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 62 // IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 63 // IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 64 // IR-PCH-HOST-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16 65 // IR-PCH-HOST-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 66 // IR-PCH-HOST-NEXT: [[TMP:%.*]] = alloca i32, align 4 67 // IR-PCH-HOST-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 68 // IR-PCH-HOST-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 69 // IR-PCH-HOST-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 70 // IR-PCH-HOST-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 71 // IR-PCH-HOST-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 72 // IR-PCH-HOST-NEXT: [[J3:%.*]] = alloca i32, align 4 73 // IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 74 // IR-PCH-HOST-NEXT: [[J4:%.*]] = alloca i32, align 4 75 // IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 76 // IR-PCH-HOST-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 77 // IR-PCH-HOST-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 78 // IR-PCH-HOST-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 79 // IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 80 // IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 81 // IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 82 // IR-PCH-HOST-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0 83 // IR-PCH-HOST-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 84 // IR-PCH-HOST-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] 85 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 86 // IR-PCH-HOST: omp.arrayinit.body: 87 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 88 // IR-PCH-HOST-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 89 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 90 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] 91 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 92 // IR-PCH-HOST: omp.arrayinit.done: 93 // IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 94 // IR-PCH-HOST-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 95 // IR-PCH-HOST-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 96 // IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 97 // IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 98 // IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 99 // IR-PCH-HOST-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) 100 // IR-PCH-HOST-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 101 // IR-PCH-HOST-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 102 // IR-PCH-HOST-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 103 // IR-PCH-HOST: cond.true: 104 // IR-PCH-HOST-NEXT: br label [[COND_END:%.*]] 105 // IR-PCH-HOST: cond.false: 106 // IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 107 // IR-PCH-HOST-NEXT: br label [[COND_END]] 108 // IR-PCH-HOST: cond.end: 109 // IR-PCH-HOST-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] 110 // IR-PCH-HOST-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 111 // IR-PCH-HOST-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 112 // IR-PCH-HOST-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 113 // IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 114 // IR-PCH-HOST: omp.inner.for.cond: 115 // IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 116 // IR-PCH-HOST-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 117 // IR-PCH-HOST-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] 118 // IR-PCH-HOST-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 119 // IR-PCH-HOST: omp.inner.for.body: 120 // IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 121 // IR-PCH-HOST-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 122 // IR-PCH-HOST-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 123 // IR-PCH-HOST-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 124 // IR-PCH-HOST-NEXT: [[TMP13:%.*]] = load i32, ptr [[J3]], align 4 125 // IR-PCH-HOST-NEXT: store i32 [[TMP13]], ptr [[J_CASTED]], align 4 126 // IR-PCH-HOST-NEXT: [[TMP14:%.*]] = load i64, ptr [[J_CASTED]], align 8 127 // IR-PCH-HOST-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined..1, i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], ptr [[SUM1]]) 128 // IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 129 // IR-PCH-HOST: omp.inner.for.inc: 130 // IR-PCH-HOST-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 131 // IR-PCH-HOST-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 132 // IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] 133 // IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 134 // IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND]] 135 // IR-PCH-HOST: omp.inner.for.end: 136 // IR-PCH-HOST-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 137 // IR-PCH-HOST: omp.loop.exit: 138 // IR-PCH-HOST-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 139 // IR-PCH-HOST-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 140 // IR-PCH-HOST-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]]) 141 // IR-PCH-HOST-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 142 // IR-PCH-HOST-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 143 // IR-PCH-HOST-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 144 // IR-PCH-HOST: .omp.lastprivate.then: 145 // IR-PCH-HOST-NEXT: store i32 10, ptr [[J3]], align 4 146 // IR-PCH-HOST-NEXT: [[TMP21:%.*]] = load i32, ptr [[J3]], align 4 147 // IR-PCH-HOST-NEXT: store i32 [[TMP21]], ptr [[J_ADDR]], align 4 148 // IR-PCH-HOST-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 149 // IR-PCH-HOST: .omp.lastprivate.done: 150 // IR-PCH-HOST-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 151 // IR-PCH-HOST-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 152 // IR-PCH-HOST-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 153 // IR-PCH-HOST-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 154 // IR-PCH-HOST-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) 155 // IR-PCH-HOST-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ 156 // IR-PCH-HOST-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] 157 // IR-PCH-HOST-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] 158 // IR-PCH-HOST-NEXT: ] 159 // IR-PCH-HOST: .omp.reduction.case1: 160 // IR-PCH-HOST-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 161 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP26]] 162 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 163 // IR-PCH-HOST: omp.arraycpy.body: 164 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 165 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] 166 // IR-PCH-HOST-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 167 // IR-PCH-HOST-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 168 // IR-PCH-HOST-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] 169 // IR-PCH-HOST-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 170 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 171 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 172 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] 173 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] 174 // IR-PCH-HOST: omp.arraycpy.done10: 175 // IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) 176 // IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 177 // IR-PCH-HOST: .omp.reduction.case2: 178 // IR-PCH-HOST-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 179 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP0]], [[TMP29]] 180 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] 181 // IR-PCH-HOST: omp.arraycpy.body12: 182 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] 183 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] 184 // IR-PCH-HOST-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 185 // IR-PCH-HOST-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP30]] monotonic, align 4 186 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 187 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 188 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] 189 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] 190 // IR-PCH-HOST: omp.arraycpy.done18: 191 // IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) 192 // IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 193 // IR-PCH-HOST: .omp.reduction.default: 194 // IR-PCH-HOST-NEXT: ret void 195 // IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp_outlined..1 196 // IR-PCH-HOST-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 197 // IR-PCH-HOST-NEXT: entry: 198 // IR-PCH-HOST-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 199 // IR-PCH-HOST-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 200 // IR-PCH-HOST-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 201 // IR-PCH-HOST-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 202 // IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 203 // IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 204 // IR-PCH-HOST-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 205 // IR-PCH-HOST-NEXT: [[TMP:%.*]] = alloca i32, align 4 206 // IR-PCH-HOST-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 207 // IR-PCH-HOST-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 208 // IR-PCH-HOST-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 209 // IR-PCH-HOST-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 210 // IR-PCH-HOST-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 211 // IR-PCH-HOST-NEXT: [[J3:%.*]] = alloca i32, align 4 212 // IR-PCH-HOST-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 213 // IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 214 // IR-PCH-HOST-NEXT: [[J5:%.*]] = alloca i32, align 4 215 // IR-PCH-HOST-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 216 // IR-PCH-HOST-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 217 // IR-PCH-HOST-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 218 // IR-PCH-HOST-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 219 // IR-PCH-HOST-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 220 // IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 221 // IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 222 // IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 223 // IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 224 // IR-PCH-HOST-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 225 // IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 226 // IR-PCH-HOST-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 227 // IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 228 // IR-PCH-HOST-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 229 // IR-PCH-HOST-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 230 // IR-PCH-HOST-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 231 // IR-PCH-HOST-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 232 // IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 233 // IR-PCH-HOST-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 234 // IR-PCH-HOST-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 235 // IR-PCH-HOST-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] 236 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 237 // IR-PCH-HOST: omp.arrayinit.body: 238 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 239 // IR-PCH-HOST-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 240 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 241 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] 242 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 243 // IR-PCH-HOST: omp.arrayinit.done: 244 // IR-PCH-HOST-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 245 // IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 246 // IR-PCH-HOST-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) 247 // IR-PCH-HOST-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 248 // IR-PCH-HOST-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 249 // IR-PCH-HOST-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 250 // IR-PCH-HOST: cond.true: 251 // IR-PCH-HOST-NEXT: br label [[COND_END:%.*]] 252 // IR-PCH-HOST: cond.false: 253 // IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 254 // IR-PCH-HOST-NEXT: br label [[COND_END]] 255 // IR-PCH-HOST: cond.end: 256 // IR-PCH-HOST-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] 257 // IR-PCH-HOST-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 258 // IR-PCH-HOST-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 259 // IR-PCH-HOST-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 260 // IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 261 // IR-PCH-HOST: omp.inner.for.cond: 262 // IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] 263 // IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] 264 // IR-PCH-HOST-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] 265 // IR-PCH-HOST-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 266 // IR-PCH-HOST: omp.inner.for.body: 267 // IR-PCH-HOST-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 268 // IR-PCH-HOST-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 10 269 // IR-PCH-HOST-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 270 // IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 271 // IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 272 // IR-PCH-HOST-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 273 // IR-PCH-HOST-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 274 // IR-PCH-HOST-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP13]], 10 275 // IR-PCH-HOST-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 276 // IR-PCH-HOST-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[MUL8]] 277 // IR-PCH-HOST-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 278 // IR-PCH-HOST-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] 279 // IR-PCH-HOST-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] 280 // IR-PCH-HOST-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 281 // IR-PCH-HOST-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 282 // IR-PCH-HOST-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 283 // IR-PCH-HOST-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] 284 // IR-PCH-HOST-NEXT: [[TMP16:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] 285 // IR-PCH-HOST-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 286 // IR-PCH-HOST-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] 287 // IR-PCH-HOST-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] 288 // IR-PCH-HOST-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP14]] 289 // IR-PCH-HOST-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] 290 // IR-PCH-HOST-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 291 // IR-PCH-HOST: omp.body.continue: 292 // IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 293 // IR-PCH-HOST: omp.inner.for.inc: 294 // IR-PCH-HOST-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 295 // IR-PCH-HOST-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP18]], 1 296 // IR-PCH-HOST-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 297 // IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] 298 // IR-PCH-HOST: omp.inner.for.end: 299 // IR-PCH-HOST-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 300 // IR-PCH-HOST: omp.loop.exit: 301 // IR-PCH-HOST-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 302 // IR-PCH-HOST-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 303 // IR-PCH-HOST-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) 304 // IR-PCH-HOST-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 305 // IR-PCH-HOST-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 306 // IR-PCH-HOST-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 307 // IR-PCH-HOST-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 308 // IR-PCH-HOST-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) 309 // IR-PCH-HOST-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ 310 // IR-PCH-HOST-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] 311 // IR-PCH-HOST-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] 312 // IR-PCH-HOST-NEXT: ] 313 // IR-PCH-HOST: .omp.reduction.case1: 314 // IR-PCH-HOST-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 315 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] 316 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 317 // IR-PCH-HOST: omp.arraycpy.body: 318 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 319 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] 320 // IR-PCH-HOST-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 321 // IR-PCH-HOST-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 322 // IR-PCH-HOST-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] 323 // IR-PCH-HOST-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 324 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 325 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 326 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] 327 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] 328 // IR-PCH-HOST: omp.arraycpy.done19: 329 // IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) 330 // IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 331 // IR-PCH-HOST: .omp.reduction.case2: 332 // IR-PCH-HOST-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 333 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP0]], [[TMP28]] 334 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] 335 // IR-PCH-HOST: omp.arraycpy.body21: 336 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] 337 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] 338 // IR-PCH-HOST-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 339 // IR-PCH-HOST-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP29]] monotonic, align 4 340 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 341 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 342 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] 343 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] 344 // IR-PCH-HOST: omp.arraycpy.done27: 345 // IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) 346 // IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 347 // IR-PCH-HOST: .omp.reduction.default: 348 // IR-PCH-HOST-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 349 // IR-PCH-HOST-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 350 // IR-PCH-HOST-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 351 // IR-PCH-HOST: .omp.lastprivate.then: 352 // IR-PCH-HOST-NEXT: store i32 10, ptr [[J3]], align 4 353 // IR-PCH-HOST-NEXT: [[TMP33:%.*]] = load i32, ptr [[J3]], align 4 354 // IR-PCH-HOST-NEXT: store i32 [[TMP33]], ptr [[J_ADDR]], align 4 355 // IR-PCH-HOST-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 356 // IR-PCH-HOST: .omp.lastprivate.done: 357 // IR-PCH-HOST-NEXT: ret void 358 // IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func 359 // IR-PCH-HOST-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { 360 // IR-PCH-HOST-NEXT: entry: 361 // IR-PCH-HOST-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 362 // IR-PCH-HOST-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 363 // IR-PCH-HOST-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 364 // IR-PCH-HOST-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 365 // IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 366 // IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 367 // IR-PCH-HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 368 // IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 369 // IR-PCH-HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 370 // IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 371 // IR-PCH-HOST-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 372 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] 373 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 374 // IR-PCH-HOST: omp.arraycpy.body: 375 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 376 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 377 // IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 378 // IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 379 // IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] 380 // IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 381 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 382 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 383 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] 384 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] 385 // IR-PCH-HOST: omp.arraycpy.done2: 386 // IR-PCH-HOST-NEXT: ret void 387 // IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2 388 // IR-PCH-HOST-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { 389 // IR-PCH-HOST-NEXT: entry: 390 // IR-PCH-HOST-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 391 // IR-PCH-HOST-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 392 // IR-PCH-HOST-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 393 // IR-PCH-HOST-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 394 // IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 395 // IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 396 // IR-PCH-HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 397 // IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 398 // IR-PCH-HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 399 // IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 400 // IR-PCH-HOST-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 401 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] 402 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 403 // IR-PCH-HOST: omp.arraycpy.body: 404 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 405 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 406 // IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 407 // IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 408 // IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] 409 // IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 410 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 411 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 412 // IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] 413 // IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] 414 // IR-PCH-HOST: omp.arraycpy.done2: 415 // IR-PCH-HOST-NEXT: ret void 416 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l23 417 // CHECK-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] { 418 // CHECK-NEXT: entry: 419 // CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 420 // CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 421 // CHECK-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) 422 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 423 // CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) 424 // CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr 425 // CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr 426 // CHECK-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr 427 // CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr 428 // CHECK-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr 429 // CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 430 // CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 431 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 432 // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) 433 // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 434 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 435 // CHECK: user_code.entry: 436 // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) 437 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[J_ADDR_ASCAST]], align 4 438 // CHECK-NEXT: store i32 [[TMP3]], ptr [[J_CASTED_ASCAST]], align 4 439 // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 440 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 441 // CHECK-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 442 // CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] 443 // CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) 444 // CHECK-NEXT: ret void 445 // CHECK: worker.exit: 446 // CHECK-NEXT: ret void 447 // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ 448 // CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { 449 // CHECK-NEXT: entry: 450 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 451 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 452 // CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 453 // CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 454 // CHECK-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 455 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) 456 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) 457 // CHECK-NEXT: [[_TMP2:%.*]] = alloca i32, align 4, addrspace(5) 458 // CHECK-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) 459 // CHECK-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) 460 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) 461 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) 462 // CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) 463 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) 464 // CHECK-NEXT: [[J4:%.*]] = alloca i32, align 4, addrspace(5) 465 // CHECK-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) 466 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5) 467 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 468 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr 469 // CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr 470 // CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr 471 // CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr 472 // CHECK-NEXT: [[SUM1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM1]] to ptr 473 // CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr 474 // CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 475 // CHECK-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr 476 // CHECK-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr 477 // CHECK-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr 478 // CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr 479 // CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr 480 // CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr 481 // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr 482 // CHECK-NEXT: [[J4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J4]] to ptr 483 // CHECK-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr 484 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr 485 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 486 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 487 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 488 // CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 489 // CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 490 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 491 // CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1_ASCAST]], i32 0, i32 0, i32 0 492 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 493 // CHECK-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] 494 // CHECK-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 495 // CHECK: omp.arrayinit.body: 496 // CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 497 // CHECK-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 498 // CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 499 // CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] 500 // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 501 // CHECK: omp.arrayinit.done: 502 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 503 // CHECK-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 504 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 505 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 506 // CHECK-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 507 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 508 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 509 // CHECK-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) 510 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 511 // CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 512 // CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 513 // CHECK: cond.true: 514 // CHECK-NEXT: br label [[COND_END:%.*]] 515 // CHECK: cond.false: 516 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 517 // CHECK-NEXT: br label [[COND_END]] 518 // CHECK: cond.end: 519 // CHECK-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] 520 // CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 521 // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 522 // CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 523 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 524 // CHECK: omp.inner.for.cond: 525 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 526 // CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP7]], 100 527 // CHECK-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 528 // CHECK: omp.inner.for.body: 529 // CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 530 // CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 531 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 532 // CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 533 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 534 // CHECK-NEXT: store i32 [[TMP12]], ptr [[J_CASTED_ASCAST]], align 4 535 // CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 536 // CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 537 // CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP9]] to ptr 538 // CHECK-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 539 // CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 540 // CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP11]] to ptr 541 // CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 542 // CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 543 // CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP13]] to ptr 544 // CHECK-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 545 // CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 546 // CHECK-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP20]], align 8 547 // CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 548 // CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 549 // CHECK-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP22]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__.1, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4) 550 // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 551 // CHECK: omp.inner.for.inc: 552 // CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 553 // CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 554 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] 555 // CHECK-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4 556 // CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 557 // CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 558 // CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] 559 // CHECK-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 560 // CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 561 // CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 562 // CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] 563 // CHECK-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 564 // CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 565 // CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP29]], 99 566 // CHECK-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] 567 // CHECK: cond.true9: 568 // CHECK-NEXT: br label [[COND_END11:%.*]] 569 // CHECK: cond.false10: 570 // CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 571 // CHECK-NEXT: br label [[COND_END11]] 572 // CHECK: cond.end11: 573 // CHECK-NEXT: [[COND12:%.*]] = phi i32 [ 99, [[COND_TRUE9]] ], [ [[TMP30]], [[COND_FALSE10]] ] 574 // CHECK-NEXT: store i32 [[COND12]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 575 // CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 576 // CHECK-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV_ASCAST]], align 4 577 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] 578 // CHECK: omp.inner.for.end: 579 // CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 580 // CHECK: omp.loop.exit: 581 // CHECK-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 582 // CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 583 // CHECK-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP33]]) 584 // CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 585 // CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 586 // CHECK-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 587 // CHECK: .omp.lastprivate.then: 588 // CHECK-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 589 // CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 590 // CHECK-NEXT: store i32 [[TMP36]], ptr [[J_ADDR_ASCAST]], align 4 591 // CHECK-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 592 // CHECK: .omp.lastprivate.done: 593 // CHECK-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 594 // CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 595 // CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 596 // CHECK-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP39]], align 8 597 // CHECK-NEXT: [[TMP40:%.*]] = load ptr, ptr addrspace(1) @"_openmp_teams_reductions_buffer_$_$ptr", align 8 598 // CHECK-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP38]], ptr [[TMP40]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func.3, ptr @_omp_reduction_inter_warp_copy_func.4, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) 599 // CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP41]], 1 600 // CHECK-NEXT: br i1 [[TMP42]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 601 // CHECK: .omp.reduction.then: 602 // CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 603 // CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP43]] 604 // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 605 // CHECK: omp.arraycpy.body: 606 // CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 607 // CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] 608 // CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 609 // CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 610 // CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] 611 // CHECK-NEXT: store i32 [[ADD14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 612 // CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 613 // CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 614 // CHECK-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP43]] 615 // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] 616 // CHECK: omp.arraycpy.done17: 617 // CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 618 // CHECK: .omp.reduction.done: 619 // CHECK-NEXT: ret void 620 // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__.1 621 // CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 622 // CHECK-NEXT: entry: 623 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 624 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 625 // CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) 626 // CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) 627 // CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 628 // CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 629 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) 630 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) 631 // CHECK-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5) 632 // CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) 633 // CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) 634 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) 635 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) 636 // CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) 637 // CHECK-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 638 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) 639 // CHECK-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5) 640 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 641 // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr 642 // CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr 643 // CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr 644 // CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr 645 // CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr 646 // CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr 647 // CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr 648 // CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 649 // CHECK-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr 650 // CHECK-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr 651 // CHECK-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr 652 // CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr 653 // CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr 654 // CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr 655 // CHECK-NEXT: [[SUM4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM4]] to ptr 656 // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr 657 // CHECK-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr 658 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 659 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 660 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 661 // CHECK-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 662 // CHECK-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 663 // CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 664 // CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 665 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 666 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 667 // CHECK-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4 668 // CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 669 // CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 670 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 671 // CHECK-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 672 // CHECK-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 673 // CHECK-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4 674 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 675 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 676 // CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i32 0, i32 0, i32 0 677 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 678 // CHECK-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] 679 // CHECK-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 680 // CHECK: omp.arrayinit.body: 681 // CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 682 // CHECK-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 683 // CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 684 // CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] 685 // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 686 // CHECK: omp.arrayinit.done: 687 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 688 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 689 // CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) 690 // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 691 // CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 692 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 693 // CHECK: omp.inner.for.cond: 694 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] 695 // CHECK-NEXT: [[CONV6:%.*]] = sext i32 [[TMP7]] to i64 696 // CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP7]] 697 // CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV6]], [[TMP8]] 698 // CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 699 // CHECK: omp.inner.for.body: 700 // CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 701 // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 10 702 // CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 703 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 704 // CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 705 // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 706 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 707 // CHECK-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP11]], 10 708 // CHECK-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 709 // CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], [[MUL8]] 710 // CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 711 // CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] 712 // CHECK-NEXT: store i32 [[ADD10]], ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 713 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 714 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 715 // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 716 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i64 0, i64 [[IDXPROM]] 717 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 718 // CHECK-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP14]] to i64 719 // CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] 720 // CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] 721 // CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP15]], [[TMP12]] 722 // CHECK-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] 723 // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 724 // CHECK: omp.body.continue: 725 // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 726 // CHECK: omp.inner.for.inc: 727 // CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 728 // CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 729 // CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] 730 // CHECK-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 731 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] 732 // CHECK: omp.inner.for.end: 733 // CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 734 // CHECK: omp.loop.exit: 735 // CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 736 // CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 737 // CHECK-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP19]]) 738 // CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 739 // CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 740 // CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 741 // CHECK-NEXT: store ptr [[SUM4_ASCAST]], ptr [[TMP22]], align 8 742 // CHECK-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP21]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) 743 // CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP23]], 1 744 // CHECK-NEXT: br i1 [[TMP24]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 745 // CHECK: .omp.reduction.then: 746 // CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 747 // CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] 748 // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 749 // CHECK: omp.arraycpy.body: 750 // CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 751 // CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] 752 // CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 753 // CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 754 // CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] 755 // CHECK-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 756 // CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 757 // CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 758 // CHECK-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] 759 // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] 760 // CHECK: omp.arraycpy.done19: 761 // CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 762 // CHECK: .omp.reduction.done: 763 // CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 764 // CHECK-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 765 // CHECK-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 766 // CHECK: .omp.lastprivate.then: 767 // CHECK-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 768 // CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 769 // CHECK-NEXT: store i32 [[TMP30]], ptr [[J_ADDR_ASCAST]], align 4 770 // CHECK-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 771 // CHECK: .omp.lastprivate.done: 772 // CHECK-NEXT: ret void 773 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func 774 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] { 775 // CHECK-NEXT: entry: 776 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 777 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) 778 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) 779 // CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) 780 // CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 781 // CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 782 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 783 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 784 // CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 785 // CHECK-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr 786 // CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr 787 // CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr 788 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 789 // CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 790 // CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 791 // CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 792 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 793 // CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 794 // CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 795 // CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 796 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 797 // CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 798 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 799 // CHECK-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 800 // CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] 801 // CHECK: .shuffle.pre_cond: 802 // CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] 803 // CHECK-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] 804 // CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 805 // CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 806 // CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] 807 // CHECK-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) 808 // CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 809 // CHECK-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] 810 // CHECK: .shuffle.then: 811 // CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 812 // CHECK-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() 813 // CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 814 // CHECK-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) 815 // CHECK-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 816 // CHECK-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 817 // CHECK-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 818 // CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND]] 819 // CHECK: .shuffle.exit: 820 // CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 821 // CHECK-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 822 // CHECK-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 823 // CHECK-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] 824 // CHECK-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] 825 // CHECK-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 826 // CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 827 // CHECK-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 828 // CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] 829 // CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 830 // CHECK-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] 831 // CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] 832 // CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] 833 // CHECK-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] 834 // CHECK: then: 835 // CHECK-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] 836 // CHECK-NEXT: br label [[IFCONT:%.*]] 837 // CHECK: else: 838 // CHECK-NEXT: br label [[IFCONT]] 839 // CHECK: ifcont: 840 // CHECK-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 841 // CHECK-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] 842 // CHECK-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] 843 // CHECK-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 844 // CHECK: then4: 845 // CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 846 // CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 847 // CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 848 // CHECK-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 849 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) 850 // CHECK-NEXT: br label [[IFCONT6:%.*]] 851 // CHECK: else5: 852 // CHECK-NEXT: br label [[IFCONT6]] 853 // CHECK: ifcont6: 854 // CHECK-NEXT: ret void 855 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func 856 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { 857 // CHECK-NEXT: entry: 858 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 859 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 860 // CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 861 // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) 862 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 863 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 864 // CHECK-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr 865 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 866 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 867 // CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 868 // CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 869 // CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 870 // CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 871 // CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 872 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 873 // CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 874 // CHECK-NEXT: br label [[PRECOND:%.*]] 875 // CHECK: precond: 876 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 877 // CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 878 // CHECK-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] 879 // CHECK: body: 880 // CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4:[0-9]+]] to ptr), i32 [[TMP2]]) 881 // CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 882 // CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 883 // CHECK: then: 884 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 885 // CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 886 // CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] 887 // CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 888 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 889 // CHECK-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 890 // CHECK-NEXT: br label [[IFCONT:%.*]] 891 // CHECK: else: 892 // CHECK-NEXT: br label [[IFCONT]] 893 // CHECK: ifcont: 894 // CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) 895 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 896 // CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] 897 // CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 898 // CHECK: then2: 899 // CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 900 // CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 901 // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 902 // CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] 903 // CHECK-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 904 // CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 905 // CHECK-NEXT: br label [[IFCONT4:%.*]] 906 // CHECK: else3: 907 // CHECK-NEXT: br label [[IFCONT4]] 908 // CHECK: ifcont4: 909 // CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 910 // CHECK-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 911 // CHECK-NEXT: br label [[PRECOND]] 912 // CHECK: exit: 913 // CHECK-NEXT: ret void 914 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func.3 915 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { 916 // CHECK-NEXT: entry: 917 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 918 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) 919 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) 920 // CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) 921 // CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 922 // CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 923 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 924 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 925 // CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 926 // CHECK-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr 927 // CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr 928 // CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr 929 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 930 // CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 931 // CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 932 // CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 933 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 934 // CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 935 // CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 936 // CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 937 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 938 // CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 939 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 940 // CHECK-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 941 // CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] 942 // CHECK: .shuffle.pre_cond: 943 // CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] 944 // CHECK-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] 945 // CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 946 // CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 947 // CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] 948 // CHECK-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) 949 // CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 950 // CHECK-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] 951 // CHECK: .shuffle.then: 952 // CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 953 // CHECK-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() 954 // CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 955 // CHECK-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) 956 // CHECK-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 957 // CHECK-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 958 // CHECK-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 959 // CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND]] 960 // CHECK: .shuffle.exit: 961 // CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 962 // CHECK-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 963 // CHECK-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 964 // CHECK-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] 965 // CHECK-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] 966 // CHECK-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 967 // CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 968 // CHECK-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 969 // CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] 970 // CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 971 // CHECK-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] 972 // CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] 973 // CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] 974 // CHECK-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] 975 // CHECK: then: 976 // CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] 977 // CHECK-NEXT: br label [[IFCONT:%.*]] 978 // CHECK: else: 979 // CHECK-NEXT: br label [[IFCONT]] 980 // CHECK: ifcont: 981 // CHECK-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 982 // CHECK-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] 983 // CHECK-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] 984 // CHECK-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 985 // CHECK: then4: 986 // CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 987 // CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 988 // CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 989 // CHECK-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 990 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) 991 // CHECK-NEXT: br label [[IFCONT6:%.*]] 992 // CHECK: else5: 993 // CHECK-NEXT: br label [[IFCONT6]] 994 // CHECK: ifcont6: 995 // CHECK-NEXT: ret void 996 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func.4 997 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { 998 // CHECK-NEXT: entry: 999 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1000 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1001 // CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 1002 // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) 1003 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1004 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1005 // CHECK-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr 1006 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1007 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1008 // CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1009 // CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1010 // CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 1011 // CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1012 // CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 1013 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1014 // CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 1015 // CHECK-NEXT: br label [[PRECOND:%.*]] 1016 // CHECK: precond: 1017 // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 1018 // CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 1019 // CHECK-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] 1020 // CHECK: body: 1021 // CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) 1022 // CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 1023 // CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 1024 // CHECK: then: 1025 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 1026 // CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 1027 // CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] 1028 // CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 1029 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 1030 // CHECK-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 1031 // CHECK-NEXT: br label [[IFCONT:%.*]] 1032 // CHECK: else: 1033 // CHECK-NEXT: br label [[IFCONT]] 1034 // CHECK: ifcont: 1035 // CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) 1036 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1037 // CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] 1038 // CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 1039 // CHECK: then2: 1040 // CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 1041 // CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 1042 // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 1043 // CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] 1044 // CHECK-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 1045 // CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 1046 // CHECK-NEXT: br label [[IFCONT4:%.*]] 1047 // CHECK: else3: 1048 // CHECK-NEXT: br label [[IFCONT4]] 1049 // CHECK: ifcont4: 1050 // CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 1051 // CHECK-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 1052 // CHECK-NEXT: br label [[PRECOND]] 1053 // CHECK: exit: 1054 // CHECK-NEXT: ret void 1055 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func 1056 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1057 // CHECK-NEXT: entry: 1058 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1059 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1060 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1061 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1062 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1063 // CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1064 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1065 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1066 // CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1067 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1068 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1069 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1070 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 1071 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 1072 // CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 1073 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] 1074 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP8]], ptr align 4 [[TMP7]], i64 400, i1 false) 1075 // CHECK-NEXT: ret void 1076 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func 1077 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1078 // CHECK-NEXT: entry: 1079 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1080 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1081 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1082 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1083 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1084 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1085 // CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1086 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 1087 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1088 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1089 // CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1090 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1091 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1092 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 1093 // CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 1094 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] 1095 // CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 1096 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1097 // CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP7]]) #[[ATTR2]] 1098 // CHECK-NEXT: ret void 1099 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func 1100 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1101 // CHECK-NEXT: entry: 1102 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1103 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1104 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1105 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1106 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1107 // CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1108 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1109 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1110 // CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1111 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1112 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1113 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1114 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 1115 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 1116 // CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 1117 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] 1118 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 128 [[TMP8]], i64 400, i1 false) 1119 // CHECK-NEXT: ret void 1120 // CHECK-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func 1121 // CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1122 // CHECK-NEXT: entry: 1123 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1124 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1125 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1126 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1127 // CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1128 // CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1129 // CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1130 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 1131 // CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1132 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1133 // CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1134 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1135 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1136 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 1137 // CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 1138 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] 1139 // CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 1140 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1141 // CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] 1142 // CHECK-NEXT: ret void 1143 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 1144 // IR-GPU-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] { 1145 // IR-GPU-NEXT: entry: 1146 // IR-GPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1147 // IR-GPU-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 1148 // IR-GPU-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1149 // IR-GPU-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) 1150 // IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 1151 // IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) 1152 // IR-GPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr 1153 // IR-GPU-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr 1154 // IR-GPU-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr 1155 // IR-GPU-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr 1156 // IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr 1157 // IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr 1158 // IR-GPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 1159 // IR-GPU-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 1160 // IR-GPU-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 1161 // IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 1162 // IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_kernel_environment to ptr), ptr [[DYN_PTR]]) 1163 // IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 1164 // IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1165 // IR-GPU: user_code.entry: 1166 // IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) 1167 // IR-GPU-NEXT: [[TMP3:%.*]] = load i32, ptr [[J_ADDR_ASCAST]], align 4 1168 // IR-GPU-NEXT: store i32 [[TMP3]], ptr [[J_CASTED_ASCAST]], align 4 1169 // IR-GPU-NEXT: [[TMP4:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 1170 // IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 1171 // IR-GPU-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 1172 // IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] 1173 // IR-GPU-NEXT: call void @__kmpc_target_deinit() 1174 // IR-GPU-NEXT: ret void 1175 // IR-GPU: worker.exit: 1176 // IR-GPU-NEXT: ret void 1177 // 1178 // 1179 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined 1180 // IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { 1181 // IR-GPU-NEXT: entry: 1182 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1183 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1184 // IR-GPU-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 1185 // IR-GPU-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1186 // IR-GPU-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 1187 // IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) 1188 // IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) 1189 // IR-GPU-NEXT: [[_TMP2:%.*]] = alloca i32, align 4, addrspace(5) 1190 // IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) 1191 // IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) 1192 // IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) 1193 // IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) 1194 // IR-GPU-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) 1195 // IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) 1196 // IR-GPU-NEXT: [[J4:%.*]] = alloca i32, align 4, addrspace(5) 1197 // IR-GPU-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) 1198 // IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5) 1199 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1200 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr 1201 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr 1202 // IR-GPU-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr 1203 // IR-GPU-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr 1204 // IR-GPU-NEXT: [[SUM1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM1]] to ptr 1205 // IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr 1206 // IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 1207 // IR-GPU-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr 1208 // IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr 1209 // IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr 1210 // IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr 1211 // IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr 1212 // IR-GPU-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr 1213 // IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr 1214 // IR-GPU-NEXT: [[J4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J4]] to ptr 1215 // IR-GPU-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr 1216 // IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr 1217 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 1218 // IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1219 // IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 1220 // IR-GPU-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 1221 // IR-GPU-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 1222 // IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 1223 // IR-GPU-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1_ASCAST]], i32 0, i32 0, i32 0 1224 // IR-GPU-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 1225 // IR-GPU-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] 1226 // IR-GPU-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 1227 // IR-GPU: omp.arrayinit.body: 1228 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 1229 // IR-GPU-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 1230 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 1231 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] 1232 // IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 1233 // IR-GPU: omp.arrayinit.done: 1234 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 1235 // IR-GPU-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1236 // IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 1237 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 1238 // IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1239 // IR-GPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1240 // IR-GPU-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 1241 // IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) 1242 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1243 // IR-GPU-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 1244 // IR-GPU-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 1245 // IR-GPU: cond.true: 1246 // IR-GPU-NEXT: br label [[COND_END:%.*]] 1247 // IR-GPU: cond.false: 1248 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1249 // IR-GPU-NEXT: br label [[COND_END]] 1250 // IR-GPU: cond.end: 1251 // IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] 1252 // IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1253 // IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 1254 // IR-GPU-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 1255 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1256 // IR-GPU: omp.inner.for.cond: 1257 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 1258 // IR-GPU-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP7]], 100 1259 // IR-GPU-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 1260 // IR-GPU: omp.inner.for.body: 1261 // IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 1262 // IR-GPU-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 1263 // IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1264 // IR-GPU-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 1265 // IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 1266 // IR-GPU-NEXT: store i32 [[TMP12]], ptr [[J_CASTED_ASCAST]], align 4 1267 // IR-GPU-NEXT: [[TMP13:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 1268 // IR-GPU-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 1269 // IR-GPU-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP9]] to ptr 1270 // IR-GPU-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 1271 // IR-GPU-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 1272 // IR-GPU-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP11]] to ptr 1273 // IR-GPU-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 1274 // IR-GPU-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 1275 // IR-GPU-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP13]] to ptr 1276 // IR-GPU-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 1277 // IR-GPU-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 1278 // IR-GPU-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP20]], align 8 1279 // IR-GPU-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1280 // IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 1281 // IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP22]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4) 1282 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1283 // IR-GPU: omp.inner.for.inc: 1284 // IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 1285 // IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 1286 // IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] 1287 // IR-GPU-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4 1288 // IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 1289 // IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 1290 // IR-GPU-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] 1291 // IR-GPU-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 1292 // IR-GPU-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1293 // IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 1294 // IR-GPU-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] 1295 // IR-GPU-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1296 // IR-GPU-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1297 // IR-GPU-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP29]], 99 1298 // IR-GPU-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] 1299 // IR-GPU: cond.true9: 1300 // IR-GPU-NEXT: br label [[COND_END11:%.*]] 1301 // IR-GPU: cond.false10: 1302 // IR-GPU-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1303 // IR-GPU-NEXT: br label [[COND_END11]] 1304 // IR-GPU: cond.end11: 1305 // IR-GPU-NEXT: [[COND12:%.*]] = phi i32 [ 99, [[COND_TRUE9]] ], [ [[TMP30]], [[COND_FALSE10]] ] 1306 // IR-GPU-NEXT: store i32 [[COND12]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 1307 // IR-GPU-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 1308 // IR-GPU-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV_ASCAST]], align 4 1309 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] 1310 // IR-GPU: omp.inner.for.end: 1311 // IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 1312 // IR-GPU: omp.loop.exit: 1313 // IR-GPU-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1314 // IR-GPU-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 1315 // IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP33]]) 1316 // IR-GPU-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 1317 // IR-GPU-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 1318 // IR-GPU-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 1319 // IR-GPU: .omp.lastprivate.then: 1320 // IR-GPU-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 1321 // IR-GPU-NEXT: [[TMP36:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 1322 // IR-GPU-NEXT: store i32 [[TMP36]], ptr [[J_ADDR_ASCAST]], align 4 1323 // IR-GPU-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 1324 // IR-GPU: .omp.lastprivate.done: 1325 // IR-GPU-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 1326 // IR-GPU-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP37]], align 8 1327 // IR-GPU-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer() 1328 // IR-GPU-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 400, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func.1, ptr @_omp_reduction_inter_warp_copy_func.2, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) 1329 // IR-GPU-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP38]], 1 1330 // IR-GPU-NEXT: br i1 [[TMP39]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 1331 // IR-GPU: .omp.reduction.then: 1332 // IR-GPU-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 1333 // IR-GPU-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP40]] 1334 // IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 1335 // IR-GPU: omp.arraycpy.body: 1336 // IR-GPU-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 1337 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] 1338 // IR-GPU-NEXT: [[TMP41:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 1339 // IR-GPU-NEXT: [[TMP42:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 1340 // IR-GPU-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] 1341 // IR-GPU-NEXT: store i32 [[ADD14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 1342 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 1343 // IR-GPU-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 1344 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP40]] 1345 // IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] 1346 // IR-GPU: omp.arraycpy.done17: 1347 // IR-GPU-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 1348 // IR-GPU: .omp.reduction.done: 1349 // IR-GPU-NEXT: ret void 1350 // 1351 // 1352 // IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp_outlined 1353 // IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 1354 // IR-GPU-NEXT: entry: 1355 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1356 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1357 // IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) 1358 // IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) 1359 // IR-GPU-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) 1360 // IR-GPU-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1361 // IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) 1362 // IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) 1363 // IR-GPU-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5) 1364 // IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) 1365 // IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) 1366 // IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) 1367 // IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) 1368 // IR-GPU-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) 1369 // IR-GPU-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 1370 // IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) 1371 // IR-GPU-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5) 1372 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1373 // IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr 1374 // IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr 1375 // IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr 1376 // IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr 1377 // IR-GPU-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr 1378 // IR-GPU-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr 1379 // IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr 1380 // IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 1381 // IR-GPU-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr 1382 // IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr 1383 // IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr 1384 // IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr 1385 // IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr 1386 // IR-GPU-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr 1387 // IR-GPU-NEXT: [[SUM4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM4]] to ptr 1388 // IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr 1389 // IR-GPU-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr 1390 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 1391 // IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1392 // IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 1393 // IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 1394 // IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 1395 // IR-GPU-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 1396 // IR-GPU-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 1397 // IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 1398 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 1399 // IR-GPU-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4 1400 // IR-GPU-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 1401 // IR-GPU-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 1402 // IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 1403 // IR-GPU-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 1404 // IR-GPU-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 1405 // IR-GPU-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4 1406 // IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 1407 // IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 1408 // IR-GPU-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i32 0, i32 0, i32 0 1409 // IR-GPU-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 1410 // IR-GPU-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] 1411 // IR-GPU-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 1412 // IR-GPU: omp.arrayinit.body: 1413 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 1414 // IR-GPU-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 1415 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 1416 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] 1417 // IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 1418 // IR-GPU: omp.arrayinit.done: 1419 // IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1420 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 1421 // IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) 1422 // IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 1423 // IR-GPU-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 1424 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1425 // IR-GPU: omp.inner.for.cond: 1426 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] 1427 // IR-GPU-NEXT: [[CONV6:%.*]] = sext i32 [[TMP7]] to i64 1428 // IR-GPU-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP7]] 1429 // IR-GPU-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV6]], [[TMP8]] 1430 // IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 1431 // IR-GPU: omp.inner.for.body: 1432 // IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1433 // IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 10 1434 // IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 1435 // IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 1436 // IR-GPU-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1437 // IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1438 // IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1439 // IR-GPU-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP11]], 10 1440 // IR-GPU-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 1441 // IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], [[MUL8]] 1442 // IR-GPU-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 1443 // IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] 1444 // IR-GPU-NEXT: store i32 [[ADD10]], ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1445 // IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1446 // IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1447 // IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 1448 // IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i64 0, i64 [[IDXPROM]] 1449 // IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1450 // IR-GPU-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP14]] to i64 1451 // IR-GPU-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] 1452 // IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] 1453 // IR-GPU-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP15]], [[TMP12]] 1454 // IR-GPU-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] 1455 // IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 1456 // IR-GPU: omp.body.continue: 1457 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1458 // IR-GPU: omp.inner.for.inc: 1459 // IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1460 // IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1461 // IR-GPU-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] 1462 // IR-GPU-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] 1463 // IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] 1464 // IR-GPU: omp.inner.for.end: 1465 // IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 1466 // IR-GPU: omp.loop.exit: 1467 // IR-GPU-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 1468 // IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 1469 // IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP19]]) 1470 // IR-GPU-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 1471 // IR-GPU-NEXT: store ptr [[SUM4_ASCAST]], ptr [[TMP20]], align 8 1472 // IR-GPU-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i64 400, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) 1473 // IR-GPU-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP21]], 1 1474 // IR-GPU-NEXT: br i1 [[TMP22]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 1475 // IR-GPU: .omp.reduction.then: 1476 // IR-GPU-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 1477 // IR-GPU-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP23]] 1478 // IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 1479 // IR-GPU: omp.arraycpy.body: 1480 // IR-GPU-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 1481 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] 1482 // IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 1483 // IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 1484 // IR-GPU-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] 1485 // IR-GPU-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 1486 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 1487 // IR-GPU-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 1488 // IR-GPU-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP23]] 1489 // IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] 1490 // IR-GPU: omp.arraycpy.done19: 1491 // IR-GPU-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 1492 // IR-GPU: .omp.reduction.done: 1493 // IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 1494 // IR-GPU-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 1495 // IR-GPU-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 1496 // IR-GPU: .omp.lastprivate.then: 1497 // IR-GPU-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 1498 // IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 1499 // IR-GPU-NEXT: store i32 [[TMP28]], ptr [[J_ADDR_ASCAST]], align 4 1500 // IR-GPU-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 1501 // IR-GPU: .omp.lastprivate.done: 1502 // IR-GPU-NEXT: ret void 1503 // 1504 // 1505 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func 1506 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] { 1507 // IR-GPU-NEXT: entry: 1508 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1509 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) 1510 // IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) 1511 // IR-GPU-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) 1512 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1513 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 1514 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1515 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1516 // IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1517 // IR-GPU-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr 1518 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr 1519 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr 1520 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1521 // IR-GPU-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 1522 // IR-GPU-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 1523 // IR-GPU-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 1524 // IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1525 // IR-GPU-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 1526 // IR-GPU-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 1527 // IR-GPU-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 1528 // IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 1529 // IR-GPU-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 1530 // IR-GPU-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 1531 // IR-GPU-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 1532 // IR-GPU-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] 1533 // IR-GPU: .shuffle.pre_cond: 1534 // IR-GPU-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] 1535 // IR-GPU-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] 1536 // IR-GPU-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 1537 // IR-GPU-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 1538 // IR-GPU-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] 1539 // IR-GPU-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) 1540 // IR-GPU-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 1541 // IR-GPU-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] 1542 // IR-GPU: .shuffle.then: 1543 // IR-GPU-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 1544 // IR-GPU-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() 1545 // IR-GPU-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 1546 // IR-GPU-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) 1547 // IR-GPU-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 1548 // IR-GPU-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 1549 // IR-GPU-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 1550 // IR-GPU-NEXT: br label [[DOTSHUFFLE_PRE_COND]] 1551 // IR-GPU: .shuffle.exit: 1552 // IR-GPU-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 1553 // IR-GPU-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 1554 // IR-GPU-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 1555 // IR-GPU-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] 1556 // IR-GPU-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] 1557 // IR-GPU-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 1558 // IR-GPU-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 1559 // IR-GPU-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 1560 // IR-GPU-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] 1561 // IR-GPU-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 1562 // IR-GPU-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] 1563 // IR-GPU-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] 1564 // IR-GPU-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] 1565 // IR-GPU-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] 1566 // IR-GPU: then: 1567 // IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] 1568 // IR-GPU-NEXT: br label [[IFCONT:%.*]] 1569 // IR-GPU: else: 1570 // IR-GPU-NEXT: br label [[IFCONT]] 1571 // IR-GPU: ifcont: 1572 // IR-GPU-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 1573 // IR-GPU-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] 1574 // IR-GPU-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] 1575 // IR-GPU-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 1576 // IR-GPU: then4: 1577 // IR-GPU-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 1578 // IR-GPU-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 1579 // IR-GPU-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 1580 // IR-GPU-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 1581 // IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) 1582 // IR-GPU-NEXT: br label [[IFCONT6:%.*]] 1583 // IR-GPU: else5: 1584 // IR-GPU-NEXT: br label [[IFCONT6]] 1585 // IR-GPU: ifcont6: 1586 // IR-GPU-NEXT: ret void 1587 // 1588 // 1589 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func 1590 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { 1591 // IR-GPU-NEXT: entry: 1592 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1593 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1594 // IR-GPU-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 1595 // IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) 1596 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1597 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1598 // IR-GPU-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr 1599 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1600 // IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1601 // IR-GPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1602 // IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1603 // IR-GPU-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 1604 // IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1605 // IR-GPU-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 1606 // IR-GPU-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1607 // IR-GPU-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 1608 // IR-GPU-NEXT: br label [[PRECOND:%.*]] 1609 // IR-GPU: precond: 1610 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 1611 // IR-GPU-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 1612 // IR-GPU-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] 1613 // IR-GPU: body: 1614 // IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4:[0-9]+]] to ptr), i32 [[TMP2]]) 1615 // IR-GPU-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 1616 // IR-GPU-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 1617 // IR-GPU: then: 1618 // IR-GPU-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 1619 // IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 1620 // IR-GPU-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] 1621 // IR-GPU-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 1622 // IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 1623 // IR-GPU-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 1624 // IR-GPU-NEXT: br label [[IFCONT:%.*]] 1625 // IR-GPU: else: 1626 // IR-GPU-NEXT: br label [[IFCONT]] 1627 // IR-GPU: ifcont: 1628 // IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) 1629 // IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1630 // IR-GPU-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] 1631 // IR-GPU-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 1632 // IR-GPU: then2: 1633 // IR-GPU-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 1634 // IR-GPU-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 1635 // IR-GPU-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 1636 // IR-GPU-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] 1637 // IR-GPU-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 1638 // IR-GPU-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 1639 // IR-GPU-NEXT: br label [[IFCONT4:%.*]] 1640 // IR-GPU: else3: 1641 // IR-GPU-NEXT: br label [[IFCONT4]] 1642 // IR-GPU: ifcont4: 1643 // IR-GPU-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 1644 // IR-GPU-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 1645 // IR-GPU-NEXT: br label [[PRECOND]] 1646 // IR-GPU: exit: 1647 // IR-GPU-NEXT: ret void 1648 // 1649 // 1650 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func.1 1651 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { 1652 // IR-GPU-NEXT: entry: 1653 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1654 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) 1655 // IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) 1656 // IR-GPU-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) 1657 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1658 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) 1659 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1660 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1661 // IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1662 // IR-GPU-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr 1663 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr 1664 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr 1665 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1666 // IR-GPU-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 1667 // IR-GPU-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 1668 // IR-GPU-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 1669 // IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1670 // IR-GPU-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 1671 // IR-GPU-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 1672 // IR-GPU-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 1673 // IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 1674 // IR-GPU-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 1675 // IR-GPU-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 1676 // IR-GPU-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 1677 // IR-GPU-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] 1678 // IR-GPU: .shuffle.pre_cond: 1679 // IR-GPU-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] 1680 // IR-GPU-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] 1681 // IR-GPU-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 1682 // IR-GPU-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 1683 // IR-GPU-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] 1684 // IR-GPU-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) 1685 // IR-GPU-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 1686 // IR-GPU-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] 1687 // IR-GPU: .shuffle.then: 1688 // IR-GPU-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 1689 // IR-GPU-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() 1690 // IR-GPU-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 1691 // IR-GPU-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) 1692 // IR-GPU-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 1693 // IR-GPU-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 1694 // IR-GPU-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 1695 // IR-GPU-NEXT: br label [[DOTSHUFFLE_PRE_COND]] 1696 // IR-GPU: .shuffle.exit: 1697 // IR-GPU-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 1698 // IR-GPU-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 1699 // IR-GPU-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 1700 // IR-GPU-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] 1701 // IR-GPU-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] 1702 // IR-GPU-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 1703 // IR-GPU-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 1704 // IR-GPU-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 1705 // IR-GPU-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] 1706 // IR-GPU-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 1707 // IR-GPU-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] 1708 // IR-GPU-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] 1709 // IR-GPU-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] 1710 // IR-GPU-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] 1711 // IR-GPU: then: 1712 // IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] 1713 // IR-GPU-NEXT: br label [[IFCONT:%.*]] 1714 // IR-GPU: else: 1715 // IR-GPU-NEXT: br label [[IFCONT]] 1716 // IR-GPU: ifcont: 1717 // IR-GPU-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 1718 // IR-GPU-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] 1719 // IR-GPU-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] 1720 // IR-GPU-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 1721 // IR-GPU: then4: 1722 // IR-GPU-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 1723 // IR-GPU-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 1724 // IR-GPU-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 1725 // IR-GPU-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 1726 // IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) 1727 // IR-GPU-NEXT: br label [[IFCONT6:%.*]] 1728 // IR-GPU: else5: 1729 // IR-GPU-NEXT: br label [[IFCONT6]] 1730 // IR-GPU: ifcont6: 1731 // IR-GPU-NEXT: ret void 1732 // 1733 // 1734 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func.2 1735 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { 1736 // IR-GPU-NEXT: entry: 1737 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1738 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1739 // IR-GPU-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) 1740 // IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) 1741 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1742 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1743 // IR-GPU-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr 1744 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1745 // IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1746 // IR-GPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1747 // IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1748 // IR-GPU-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 1749 // IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1750 // IR-GPU-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 1751 // IR-GPU-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1752 // IR-GPU-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 1753 // IR-GPU-NEXT: br label [[PRECOND:%.*]] 1754 // IR-GPU: precond: 1755 // IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 1756 // IR-GPU-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 1757 // IR-GPU-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] 1758 // IR-GPU: body: 1759 // IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) 1760 // IR-GPU-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 1761 // IR-GPU-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 1762 // IR-GPU: then: 1763 // IR-GPU-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 1764 // IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 1765 // IR-GPU-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] 1766 // IR-GPU-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 1767 // IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 1768 // IR-GPU-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 1769 // IR-GPU-NEXT: br label [[IFCONT:%.*]] 1770 // IR-GPU: else: 1771 // IR-GPU-NEXT: br label [[IFCONT]] 1772 // IR-GPU: ifcont: 1773 // IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) 1774 // IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1775 // IR-GPU-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] 1776 // IR-GPU-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 1777 // IR-GPU: then2: 1778 // IR-GPU-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 1779 // IR-GPU-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 1780 // IR-GPU-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 1781 // IR-GPU-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] 1782 // IR-GPU-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 1783 // IR-GPU-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 1784 // IR-GPU-NEXT: br label [[IFCONT4:%.*]] 1785 // IR-GPU: else3: 1786 // IR-GPU-NEXT: br label [[IFCONT4]] 1787 // IR-GPU: ifcont4: 1788 // IR-GPU-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 1789 // IR-GPU-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 1790 // IR-GPU-NEXT: br label [[PRECOND]] 1791 // IR-GPU: exit: 1792 // IR-GPU-NEXT: ret void 1793 // 1794 // 1795 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func 1796 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1797 // IR-GPU-NEXT: entry: 1798 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1799 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1800 // IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1801 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1802 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1803 // IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1804 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1805 // IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1806 // IR-GPU-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1807 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1808 // IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1809 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1810 // IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 1811 // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 1812 // IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 1813 // IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] 1814 // IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP8]], ptr align 4 [[TMP7]], i64 400, i1 false) 1815 // IR-GPU-NEXT: ret void 1816 // 1817 // 1818 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func 1819 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1820 // IR-GPU-NEXT: entry: 1821 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1822 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1823 // IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1824 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1825 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1826 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1827 // IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1828 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 1829 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1830 // IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1831 // IR-GPU-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1832 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1833 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1834 // IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 1835 // IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 1836 // IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] 1837 // IR-GPU-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 1838 // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1839 // IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP7]]) #[[ATTR2]] 1840 // IR-GPU-NEXT: ret void 1841 // 1842 // 1843 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func 1844 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1845 // IR-GPU-NEXT: entry: 1846 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1847 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1848 // IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1849 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1850 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1851 // IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1852 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1853 // IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1854 // IR-GPU-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1855 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1856 // IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1857 // IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1858 // IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 1859 // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 1860 // IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 1861 // IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] 1862 // IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[TMP8]], i64 400, i1 false) 1863 // IR-GPU-NEXT: ret void 1864 // 1865 // 1866 // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func 1867 // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { 1868 // IR-GPU-NEXT: entry: 1869 // IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) 1870 // IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) 1871 // IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) 1872 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) 1873 // IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr 1874 // IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr 1875 // IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr 1876 // IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr 1877 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 1878 // IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 1879 // IR-GPU-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 1880 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 1881 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 1882 // IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 1883 // IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 1884 // IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] 1885 // IR-GPU-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 1886 // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 1887 // IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] 1888 // IR-GPU-NEXT: ret void 1889 // 1890 // 1891 // IR-LABEL: define {{[^@]+}}@_Z3foov 1892 // IR-SAME: () #[[ATTR0:[0-9]+]] { 1893 // IR-NEXT: entry: 1894 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 1895 // IR-NEXT: [[J:%.*]] = alloca i32, align 4 1896 // IR-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 1897 // IR-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 1898 // IR-NEXT: [[TMP0:%.*]] = load i32, ptr [[J]], align 4 1899 // IR-NEXT: store i32 [[TMP0]], ptr [[J_CASTED]], align 4 1900 // IR-NEXT: [[TMP1:%.*]] = load i64, ptr [[J_CASTED]], align 8 1901 // IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22(i64 [[TMP1]], ptr [[SUM]]) #[[ATTR2:[0-9]+]] 1902 // IR-NEXT: ret i32 0 1903 // 1904 // 1905 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 1906 // IR-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { 1907 // IR-NEXT: entry: 1908 // IR-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 1909 // IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 1910 // IR-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 1911 // IR-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 1912 // IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 1913 // IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 1914 // IR-NEXT: [[TMP1:%.*]] = load i32, ptr [[J_ADDR]], align 4 1915 // IR-NEXT: store i32 [[TMP1]], ptr [[J_CASTED]], align 4 1916 // IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[J_CASTED]], align 8 1917 // IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined, i64 [[TMP2]], ptr [[TMP0]]) 1918 // IR-NEXT: ret void 1919 // 1920 // 1921 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined 1922 // IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 1923 // IR-NEXT: entry: 1924 // IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 1925 // IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 1926 // IR-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 1927 // IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 1928 // IR-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16 1929 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 1930 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 1931 // IR-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 1932 // IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 1933 // IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 1934 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 1935 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 1936 // IR-NEXT: [[J3:%.*]] = alloca i32, align 4 1937 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 1938 // IR-NEXT: [[J4:%.*]] = alloca i32, align 4 1939 // IR-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 1940 // IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 1941 // IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 1942 // IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 1943 // IR-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 1944 // IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 1945 // IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 1946 // IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0 1947 // IR-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 1948 // IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] 1949 // IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 1950 // IR: omp.arrayinit.body: 1951 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 1952 // IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 1953 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 1954 // IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] 1955 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 1956 // IR: omp.arrayinit.done: 1957 // IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 1958 // IR-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 1959 // IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 1960 // IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 1961 // IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 1962 // IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 1963 // IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) 1964 // IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 1965 // IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 1966 // IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 1967 // IR: cond.true: 1968 // IR-NEXT: br label [[COND_END:%.*]] 1969 // IR: cond.false: 1970 // IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 1971 // IR-NEXT: br label [[COND_END]] 1972 // IR: cond.end: 1973 // IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] 1974 // IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 1975 // IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 1976 // IR-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 1977 // IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1978 // IR: omp.inner.for.cond: 1979 // IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 1980 // IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 1981 // IR-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] 1982 // IR-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 1983 // IR: omp.inner.for.body: 1984 // IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 1985 // IR-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 1986 // IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 1987 // IR-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 1988 // IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[J3]], align 4 1989 // IR-NEXT: store i32 [[TMP13]], ptr [[J_CASTED]], align 4 1990 // IR-NEXT: [[TMP14:%.*]] = load i64, ptr [[J_CASTED]], align 8 1991 // IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined, i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], ptr [[SUM1]]) 1992 // IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1993 // IR: omp.inner.for.inc: 1994 // IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 1995 // IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 1996 // IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] 1997 // IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 1998 // IR-NEXT: br label [[OMP_INNER_FOR_COND]] 1999 // IR: omp.inner.for.end: 2000 // IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2001 // IR: omp.loop.exit: 2002 // IR-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2003 // IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 2004 // IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]]) 2005 // IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 2006 // IR-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 2007 // IR-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 2008 // IR: .omp.lastprivate.then: 2009 // IR-NEXT: store i32 10, ptr [[J3]], align 4 2010 // IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[J3]], align 4 2011 // IR-NEXT: store i32 [[TMP21]], ptr [[J_ADDR]], align 4 2012 // IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 2013 // IR: .omp.lastprivate.done: 2014 // IR-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 2015 // IR-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 2016 // IR-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2017 // IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 2018 // IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) 2019 // IR-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ 2020 // IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] 2021 // IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] 2022 // IR-NEXT: ] 2023 // IR: .omp.reduction.case1: 2024 // IR-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2025 // IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP26]] 2026 // IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2027 // IR: omp.arraycpy.body: 2028 // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2029 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2030 // IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 2031 // IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2032 // IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] 2033 // IR-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 2034 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 2035 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2036 // IR-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] 2037 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] 2038 // IR: omp.arraycpy.done10: 2039 // IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) 2040 // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2041 // IR: .omp.reduction.case2: 2042 // IR-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2043 // IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP0]], [[TMP29]] 2044 // IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] 2045 // IR: omp.arraycpy.body12: 2046 // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] 2047 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] 2048 // IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 2049 // IR-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP30]] monotonic, align 4 2050 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 2051 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 2052 // IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] 2053 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] 2054 // IR: omp.arraycpy.done18: 2055 // IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) 2056 // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2057 // IR: .omp.reduction.default: 2058 // IR-NEXT: ret void 2059 // 2060 // 2061 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined 2062 // IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 2063 // IR-NEXT: entry: 2064 // IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2065 // IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2066 // IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 2067 // IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 2068 // IR-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 2069 // IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 2070 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2071 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 2072 // IR-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 2073 // IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 2074 // IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 2075 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2076 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2077 // IR-NEXT: [[J3:%.*]] = alloca i32, align 4 2078 // IR-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 2079 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 2080 // IR-NEXT: [[J5:%.*]] = alloca i32, align 4 2081 // IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 2082 // IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 2083 // IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 2084 // IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 2085 // IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 2086 // IR-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 2087 // IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 2088 // IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 2089 // IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 2090 // IR-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 2091 // IR-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 2092 // IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 2093 // IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 2094 // IR-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 2095 // IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 2096 // IR-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 2097 // IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 2098 // IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 2099 // IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 2100 // IR-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 2101 // IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] 2102 // IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 2103 // IR: omp.arrayinit.body: 2104 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 2105 // IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2106 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2107 // IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] 2108 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 2109 // IR: omp.arrayinit.done: 2110 // IR-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2111 // IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 2112 // IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) 2113 // IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 2114 // IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 2115 // IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2116 // IR: cond.true: 2117 // IR-NEXT: br label [[COND_END:%.*]] 2118 // IR: cond.false: 2119 // IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 2120 // IR-NEXT: br label [[COND_END]] 2121 // IR: cond.end: 2122 // IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] 2123 // IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 2124 // IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 2125 // IR-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 2126 // IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2127 // IR: omp.inner.for.cond: 2128 // IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] 2129 // IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] 2130 // IR-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] 2131 // IR-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2132 // IR: omp.inner.for.body: 2133 // IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2134 // IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 10 2135 // IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 2136 // IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 2137 // IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 2138 // IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2139 // IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2140 // IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP13]], 10 2141 // IR-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 2142 // IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[MUL8]] 2143 // IR-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 2144 // IR-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] 2145 // IR-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] 2146 // IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 2147 // IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 2148 // IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 2149 // IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] 2150 // IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] 2151 // IR-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 2152 // IR-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] 2153 // IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] 2154 // IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP14]] 2155 // IR-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] 2156 // IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 2157 // IR: omp.body.continue: 2158 // IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2159 // IR: omp.inner.for.inc: 2160 // IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2161 // IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP18]], 1 2162 // IR-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2163 // IR-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] 2164 // IR: omp.inner.for.end: 2165 // IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2166 // IR: omp.loop.exit: 2167 // IR-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2168 // IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 2169 // IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) 2170 // IR-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 2171 // IR-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 2172 // IR-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2173 // IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 2174 // IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) 2175 // IR-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ 2176 // IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] 2177 // IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] 2178 // IR-NEXT: ] 2179 // IR: .omp.reduction.case1: 2180 // IR-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2181 // IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] 2182 // IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2183 // IR: omp.arraycpy.body: 2184 // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2185 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2186 // IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 2187 // IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2188 // IR-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] 2189 // IR-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 2190 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 2191 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2192 // IR-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] 2193 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] 2194 // IR: omp.arraycpy.done19: 2195 // IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) 2196 // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2197 // IR: .omp.reduction.case2: 2198 // IR-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2199 // IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP0]], [[TMP28]] 2200 // IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] 2201 // IR: omp.arraycpy.body21: 2202 // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] 2203 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] 2204 // IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 2205 // IR-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP29]] monotonic, align 4 2206 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 2207 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 2208 // IR-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] 2209 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] 2210 // IR: omp.arraycpy.done27: 2211 // IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) 2212 // IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2213 // IR: .omp.reduction.default: 2214 // IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 2215 // IR-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 2216 // IR-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 2217 // IR: .omp.lastprivate.then: 2218 // IR-NEXT: store i32 10, ptr [[J3]], align 4 2219 // IR-NEXT: [[TMP33:%.*]] = load i32, ptr [[J3]], align 4 2220 // IR-NEXT: store i32 [[TMP33]], ptr [[J_ADDR]], align 4 2221 // IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 2222 // IR: .omp.lastprivate.done: 2223 // IR-NEXT: ret void 2224 // 2225 // 2226 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func 2227 // IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { 2228 // IR-NEXT: entry: 2229 // IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 2230 // IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 2231 // IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 2232 // IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 2233 // IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 2234 // IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 2235 // IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 2236 // IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 2237 // IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 2238 // IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 2239 // IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 2240 // IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] 2241 // IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2242 // IR: omp.arraycpy.body: 2243 // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2244 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2245 // IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2246 // IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2247 // IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] 2248 // IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2249 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2250 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2251 // IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] 2252 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] 2253 // IR: omp.arraycpy.done2: 2254 // IR-NEXT: ret void 2255 // 2256 // 2257 // IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func 2258 // IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { 2259 // IR-NEXT: entry: 2260 // IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 2261 // IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 2262 // IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 2263 // IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 2264 // IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 2265 // IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 2266 // IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 2267 // IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 2268 // IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 2269 // IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 2270 // IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 2271 // IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] 2272 // IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2273 // IR: omp.arraycpy.body: 2274 // IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2275 // IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2276 // IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2277 // IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2278 // IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] 2279 // IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2280 // IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2281 // IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2282 // IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] 2283 // IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] 2284 // IR: omp.arraycpy.done2: 2285 // IR-NEXT: ret void 2286 // 2287 // 2288 // IR-PCH-LABEL: define {{[^@]+}}@_Z3foov 2289 // IR-PCH-SAME: () #[[ATTR0:[0-9]+]] { 2290 // IR-PCH-NEXT: entry: 2291 // IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 2292 // IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 2293 // IR-PCH-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 2294 // IR-PCH-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 2295 // IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr [[J]], align 4 2296 // IR-PCH-NEXT: store i32 [[TMP0]], ptr [[J_CASTED]], align 4 2297 // IR-PCH-NEXT: [[TMP1:%.*]] = load i64, ptr [[J_CASTED]], align 8 2298 // IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22(i64 [[TMP1]], ptr [[SUM]]) #[[ATTR2:[0-9]+]] 2299 // IR-PCH-NEXT: ret i32 0 2300 // 2301 // 2302 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 2303 // IR-PCH-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { 2304 // IR-PCH-NEXT: entry: 2305 // IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 2306 // IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 2307 // IR-PCH-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 2308 // IR-PCH-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 2309 // IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 2310 // IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 2311 // IR-PCH-NEXT: [[TMP1:%.*]] = load i32, ptr [[J_ADDR]], align 4 2312 // IR-PCH-NEXT: store i32 [[TMP1]], ptr [[J_CASTED]], align 4 2313 // IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[J_CASTED]], align 8 2314 // IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined, i64 [[TMP2]], ptr [[TMP0]]) 2315 // IR-PCH-NEXT: ret void 2316 // 2317 // 2318 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined 2319 // IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 2320 // IR-PCH-NEXT: entry: 2321 // IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2322 // IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2323 // IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 2324 // IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 2325 // IR-PCH-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16 2326 // IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2327 // IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 2328 // IR-PCH-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 2329 // IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 2330 // IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 2331 // IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2332 // IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2333 // IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4 2334 // IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 2335 // IR-PCH-NEXT: [[J4:%.*]] = alloca i32, align 4 2336 // IR-PCH-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 2337 // IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 2338 // IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 2339 // IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 2340 // IR-PCH-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 2341 // IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 2342 // IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 2343 // IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0 2344 // IR-PCH-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 2345 // IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] 2346 // IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 2347 // IR-PCH: omp.arrayinit.body: 2348 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 2349 // IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2350 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2351 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] 2352 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 2353 // IR-PCH: omp.arrayinit.done: 2354 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 2355 // IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 2356 // IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 2357 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 2358 // IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2359 // IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 2360 // IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) 2361 // IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 2362 // IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 2363 // IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2364 // IR-PCH: cond.true: 2365 // IR-PCH-NEXT: br label [[COND_END:%.*]] 2366 // IR-PCH: cond.false: 2367 // IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 2368 // IR-PCH-NEXT: br label [[COND_END]] 2369 // IR-PCH: cond.end: 2370 // IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] 2371 // IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 2372 // IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 2373 // IR-PCH-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 2374 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2375 // IR-PCH: omp.inner.for.cond: 2376 // IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 2377 // IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 2378 // IR-PCH-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] 2379 // IR-PCH-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2380 // IR-PCH: omp.inner.for.body: 2381 // IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 2382 // IR-PCH-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 2383 // IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 2384 // IR-PCH-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 2385 // IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[J3]], align 4 2386 // IR-PCH-NEXT: store i32 [[TMP13]], ptr [[J_CASTED]], align 4 2387 // IR-PCH-NEXT: [[TMP14:%.*]] = load i64, ptr [[J_CASTED]], align 8 2388 // IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined, i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], ptr [[SUM1]]) 2389 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2390 // IR-PCH: omp.inner.for.inc: 2391 // IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 2392 // IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 2393 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] 2394 // IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 2395 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] 2396 // IR-PCH: omp.inner.for.end: 2397 // IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2398 // IR-PCH: omp.loop.exit: 2399 // IR-PCH-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2400 // IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 2401 // IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]]) 2402 // IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 2403 // IR-PCH-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 2404 // IR-PCH-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 2405 // IR-PCH: .omp.lastprivate.then: 2406 // IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4 2407 // IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[J3]], align 4 2408 // IR-PCH-NEXT: store i32 [[TMP21]], ptr [[J_ADDR]], align 4 2409 // IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 2410 // IR-PCH: .omp.lastprivate.done: 2411 // IR-PCH-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 2412 // IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 2413 // IR-PCH-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2414 // IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 2415 // IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) 2416 // IR-PCH-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ 2417 // IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] 2418 // IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] 2419 // IR-PCH-NEXT: ] 2420 // IR-PCH: .omp.reduction.case1: 2421 // IR-PCH-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2422 // IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP26]] 2423 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2424 // IR-PCH: omp.arraycpy.body: 2425 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2426 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2427 // IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 2428 // IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2429 // IR-PCH-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] 2430 // IR-PCH-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 2431 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 2432 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2433 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] 2434 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] 2435 // IR-PCH: omp.arraycpy.done10: 2436 // IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) 2437 // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2438 // IR-PCH: .omp.reduction.case2: 2439 // IR-PCH-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2440 // IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP0]], [[TMP29]] 2441 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] 2442 // IR-PCH: omp.arraycpy.body12: 2443 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] 2444 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] 2445 // IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 2446 // IR-PCH-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP30]] monotonic, align 4 2447 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 2448 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 2449 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] 2450 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] 2451 // IR-PCH: omp.arraycpy.done18: 2452 // IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) 2453 // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2454 // IR-PCH: .omp.reduction.default: 2455 // IR-PCH-NEXT: ret void 2456 // 2457 // 2458 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined 2459 // IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { 2460 // IR-PCH-NEXT: entry: 2461 // IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 2462 // IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 2463 // IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 2464 // IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 2465 // IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 2466 // IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 2467 // IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2468 // IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 2469 // IR-PCH-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 2470 // IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 2471 // IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 2472 // IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2473 // IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2474 // IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4 2475 // IR-PCH-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 2476 // IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 2477 // IR-PCH-NEXT: [[J5:%.*]] = alloca i32, align 4 2478 // IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 2479 // IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 2480 // IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 2481 // IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 2482 // IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 2483 // IR-PCH-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 2484 // IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 2485 // IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 2486 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 2487 // IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 2488 // IR-PCH-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 2489 // IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 2490 // IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 2491 // IR-PCH-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 2492 // IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 2493 // IR-PCH-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 2494 // IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 2495 // IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 2496 // IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 2497 // IR-PCH-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 2498 // IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] 2499 // IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] 2500 // IR-PCH: omp.arrayinit.body: 2501 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] 2502 // IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2503 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2504 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] 2505 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] 2506 // IR-PCH: omp.arrayinit.done: 2507 // IR-PCH-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2508 // IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 2509 // IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) 2510 // IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 2511 // IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 2512 // IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2513 // IR-PCH: cond.true: 2514 // IR-PCH-NEXT: br label [[COND_END:%.*]] 2515 // IR-PCH: cond.false: 2516 // IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 2517 // IR-PCH-NEXT: br label [[COND_END]] 2518 // IR-PCH: cond.end: 2519 // IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] 2520 // IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 2521 // IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 2522 // IR-PCH-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 2523 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2524 // IR-PCH: omp.inner.for.cond: 2525 // IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] 2526 // IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] 2527 // IR-PCH-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] 2528 // IR-PCH-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] 2529 // IR-PCH: omp.inner.for.body: 2530 // IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2531 // IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 10 2532 // IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 2533 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 2534 // IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 2535 // IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2536 // IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2537 // IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP13]], 10 2538 // IR-PCH-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 2539 // IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[MUL8]] 2540 // IR-PCH-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 2541 // IR-PCH-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] 2542 // IR-PCH-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] 2543 // IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 2544 // IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] 2545 // IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 2546 // IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] 2547 // IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] 2548 // IR-PCH-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 2549 // IR-PCH-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] 2550 // IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] 2551 // IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP14]] 2552 // IR-PCH-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] 2553 // IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 2554 // IR-PCH: omp.body.continue: 2555 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2556 // IR-PCH: omp.inner.for.inc: 2557 // IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2558 // IR-PCH-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP18]], 1 2559 // IR-PCH-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] 2560 // IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] 2561 // IR-PCH: omp.inner.for.end: 2562 // IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2563 // IR-PCH: omp.loop.exit: 2564 // IR-PCH-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2565 // IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 2566 // IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) 2567 // IR-PCH-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 2568 // IR-PCH-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 2569 // IR-PCH-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 2570 // IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 2571 // IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) 2572 // IR-PCH-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ 2573 // IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] 2574 // IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] 2575 // IR-PCH-NEXT: ] 2576 // IR-PCH: .omp.reduction.case1: 2577 // IR-PCH-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2578 // IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] 2579 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2580 // IR-PCH: omp.arraycpy.body: 2581 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2582 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2583 // IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 2584 // IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2585 // IR-PCH-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] 2586 // IR-PCH-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 2587 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 2588 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2589 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] 2590 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] 2591 // IR-PCH: omp.arraycpy.done19: 2592 // IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) 2593 // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2594 // IR-PCH: .omp.reduction.case2: 2595 // IR-PCH-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 2596 // IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP0]], [[TMP28]] 2597 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] 2598 // IR-PCH: omp.arraycpy.body21: 2599 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] 2600 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] 2601 // IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 2602 // IR-PCH-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP29]] monotonic, align 4 2603 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 2604 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 2605 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] 2606 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] 2607 // IR-PCH: omp.arraycpy.done27: 2608 // IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) 2609 // IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] 2610 // IR-PCH: .omp.reduction.default: 2611 // IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 2612 // IR-PCH-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 2613 // IR-PCH-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] 2614 // IR-PCH: .omp.lastprivate.then: 2615 // IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4 2616 // IR-PCH-NEXT: [[TMP33:%.*]] = load i32, ptr [[J3]], align 4 2617 // IR-PCH-NEXT: store i32 [[TMP33]], ptr [[J_ADDR]], align 4 2618 // IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] 2619 // IR-PCH: .omp.lastprivate.done: 2620 // IR-PCH-NEXT: ret void 2621 // 2622 // 2623 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func 2624 // IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { 2625 // IR-PCH-NEXT: entry: 2626 // IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 2627 // IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 2628 // IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 2629 // IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 2630 // IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 2631 // IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 2632 // IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 2633 // IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 2634 // IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 2635 // IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 2636 // IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 2637 // IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] 2638 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2639 // IR-PCH: omp.arraycpy.body: 2640 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2641 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2642 // IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2643 // IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2644 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] 2645 // IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2646 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2647 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2648 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] 2649 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] 2650 // IR-PCH: omp.arraycpy.done2: 2651 // IR-PCH-NEXT: ret void 2652 // 2653 // 2654 // IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func 2655 // IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { 2656 // IR-PCH-NEXT: entry: 2657 // IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 2658 // IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 2659 // IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 2660 // IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 2661 // IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 2662 // IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 2663 // IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 2664 // IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 2665 // IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 2666 // IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 2667 // IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 2668 // IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] 2669 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] 2670 // IR-PCH: omp.arraycpy.body: 2671 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2672 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] 2673 // IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2674 // IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 2675 // IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] 2676 // IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 2677 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 2678 // IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 2679 // IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] 2680 // IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] 2681 // IR-PCH: omp.arraycpy.done2: 2682 // IR-PCH-NEXT: ret void 2683 // 2684